diff --git a/BCC-Examples/hello_fields.py b/BCC-Examples/hello_fields.py new file mode 100644 index 00000000..46156c5c --- /dev/null +++ b/BCC-Examples/hello_fields.py @@ -0,0 +1,34 @@ +from pythonbpf import bpf, section, bpfglobal, BPF, trace_fields +from ctypes import c_void_p, c_int64 + + +@bpf +@section("tracepoint/syscalls/sys_enter_clone") +def hello_world(ctx: c_void_p) -> c_int64: + print("Hello, World!") + return 0 # type: ignore [return-value] + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +# Compile and load +b = BPF() +b.load() +b.attach_all() + +# header +print(f"{'TIME(s)':<18} {'COMM':<16} {'PID':<6} {'MESSAGE'}") + +# format output +while True: + try: + (task, pid, cpu, flags, ts, msg) = trace_fields() + except ValueError: + continue + except KeyboardInterrupt: + exit() + print(f"{ts:<18} {task:<16} {pid:<6} {msg}") diff --git a/BCC-Examples/hello_perf_output.py b/BCC-Examples/hello_perf_output.py new file mode 100644 index 00000000..2bc195d7 --- /dev/null +++ b/BCC-Examples/hello_perf_output.py @@ -0,0 +1,61 @@ +from pythonbpf import bpf, map, struct, section, bpfglobal, BPF +from pythonbpf.helper import ktime, pid, comm +from pythonbpf.maps import PerfEventArray +from ctypes import c_void_p, c_int64 + + +@bpf +@struct +class data_t: + pid: c_int64 + ts: c_int64 + comm: str(16) # type: ignore [valid-type] + + +@bpf +@map +def events() -> PerfEventArray: + return PerfEventArray(key_size=c_int64, value_size=c_int64) + + +@bpf +@section("tracepoint/syscalls/sys_enter_clone") +def hello(ctx: c_void_p) -> c_int64: + dataobj = data_t() + dataobj.pid, dataobj.ts = pid(), ktime() + comm(dataobj.comm) + events.output(dataobj) + return 0 # type: ignore [return-value] + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +# Compile and load +b = BPF() +b.load() +b.attach_all() + +start = 0 + + +def callback(cpu, event): + global start + if start == 0: + start = event.ts + ts = (event.ts - start) / 1e9 + print(f"[CPU {cpu}] PID: {event.pid}, TS: {ts}, COMM: {event.comm.decode()}") + + +perf = b["events"].open_perf_buffer(callback, struct_name="data_t") +print("Starting to poll... (Ctrl+C to stop)") +print("Try running: fork() or clone() system calls to trigger events") + +try: + while True: + b["events"].poll(1000) +except KeyboardInterrupt: + print("Stopping...") diff --git a/BCC-Examples/hello_world.py b/BCC-Examples/hello_world.py new file mode 100644 index 00000000..520f84c4 --- /dev/null +++ b/BCC-Examples/hello_world.py @@ -0,0 +1,23 @@ +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from ctypes import c_void_p, c_int64 + + +@bpf +@section("tracepoint/syscalls/sys_enter_clone") +def hello_world(ctx: c_void_p) -> c_int64: + print("Hello, World!") + return 0 # type: ignore [return-value] + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +# Compile and load +b = BPF() +b.load() +b.attach_all() + +trace_pipe() diff --git a/BCC-Examples/sync_count.py b/BCC-Examples/sync_count.py new file mode 100644 index 00000000..4eead08e --- /dev/null +++ b/BCC-Examples/sync_count.py @@ -0,0 +1,58 @@ +from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_fields +from pythonbpf.helper import ktime +from pythonbpf.maps import HashMap + +from ctypes import c_void_p, c_int64 + + +@bpf +@map +def last() -> HashMap: + return HashMap(key=c_int64, value=c_int64, max_entries=2) + + +@bpf +@section("tracepoint/syscalls/sys_enter_sync") +def do_trace(ctx: c_void_p) -> c_int64: + ts_key, cnt_key = 0, 1 + tsp, cntp = last.lookup(ts_key), last.lookup(cnt_key) + if not cntp: + last.update(cnt_key, 0) + cntp = last.lookup(cnt_key) + if tsp: + delta = ktime() - tsp + if delta < 1000000000: + time_ms = delta // 1000000 + print(f"{time_ms} {cntp}") + last.delete(ts_key) + else: + last.update(ts_key, ktime()) + last.update(cnt_key, cntp + 1) + return 0 # type: ignore [return-value] + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +# Compile and load +b = BPF() +b.load() +b.attach_all() + +print("Tracing for quick sync's... Ctrl-C to end") + +# format output +start = 0 +while True: + try: + task, pid, cpu, flags, ts, msg = trace_fields() + if start == 0: + start = ts + ts -= start + ms, cnt = msg.split() + print(f"At time {ts} s: Multiple syncs detected, last {ms} ms ago. Count {cnt}") + except KeyboardInterrupt: + exit() diff --git a/BCC-Examples/sync_perf_output.py b/BCC-Examples/sync_perf_output.py new file mode 100644 index 00000000..4b2447af --- /dev/null +++ b/BCC-Examples/sync_perf_output.py @@ -0,0 +1,78 @@ +from pythonbpf import bpf, map, struct, section, bpfglobal, BPF +from pythonbpf.helper import ktime +from pythonbpf.maps import HashMap +from pythonbpf.maps import PerfEventArray +from ctypes import c_void_p, c_int64 + + +@bpf +@struct +class data_t: + ts: c_int64 + ms: c_int64 + + +@bpf +@map +def events() -> PerfEventArray: + return PerfEventArray(key_size=c_int64, value_size=c_int64) + + +@bpf +@map +def last() -> HashMap: + return HashMap(key=c_int64, value=c_int64, max_entries=1) + + +@bpf +@section("tracepoint/syscalls/sys_enter_sync") +def do_trace(ctx: c_void_p) -> c_int64: + dat, dat.ts, key = data_t(), ktime(), 0 + tsp = last.lookup(key) + if tsp: + delta = ktime() - tsp + if delta < 1000000000: + dat.ms = delta // 1000000 + events.output(dat) + last.delete(key) + else: + last.update(key, ktime()) + return 0 # type: ignore [return-value] + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +# Compile and load +b = BPF() +b.load() +b.attach_all() + +print("Tracing for quick sync's... Ctrl-C to end") + +# format output +start = 0 + + +def callback(cpu, event): + global start + if start == 0: + start = event.ts + event.ts -= start + print( + f"At time {event.ts / 1e9} s: Multiple sync detected, Last sync: {event.ms} ms ago" + ) + + +perf = b["events"].open_perf_buffer(callback, struct_name="data_t") +print("Starting to poll... (Ctrl+C to stop)") +print("Try running: fork() or clone() system calls to trigger events") + +try: + while True: + b["events"].poll(1000) +except KeyboardInterrupt: + print("Stopping...") diff --git a/BCC-Examples/sync_timing.py b/BCC-Examples/sync_timing.py new file mode 100644 index 00000000..aba494d8 --- /dev/null +++ b/BCC-Examples/sync_timing.py @@ -0,0 +1,53 @@ +from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_fields +from pythonbpf.helper import ktime +from pythonbpf.maps import HashMap + +from ctypes import c_void_p, c_int64 + + +@bpf +@map +def last() -> HashMap: + return HashMap(key=c_int64, value=c_int64, max_entries=1) + + +@bpf +@section("tracepoint/syscalls/sys_enter_sync") +def do_trace(ctx: c_void_p) -> c_int64: + key = 0 + tsp = last.lookup(key) + if tsp: + delta = ktime() - tsp + if delta < 1000000000: + time_ms = delta // 1000000 + print(f"{time_ms}") + last.delete(key) + else: + last.update(key, ktime()) + return 0 # type: ignore [return-value] + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +# Compile and load +b = BPF() +b.load() +b.attach_all() + +print("Tracing for quick sync's... Ctrl-C to end") + +# format output +start = 0 +while True: + try: + task, pid, cpu, flags, ts, ms = trace_fields() + if start == 0: + start = ts + ts -= start + print(f"At time {ts} s: Multiple syncs detected, last {ms} ms ago") + except KeyboardInterrupt: + exit() diff --git a/BCC-Examples/sys_sync.py b/BCC-Examples/sys_sync.py new file mode 100644 index 00000000..4d7314a4 --- /dev/null +++ b/BCC-Examples/sys_sync.py @@ -0,0 +1,23 @@ +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from ctypes import c_void_p, c_int64 + + +@bpf +@section("tracepoint/syscalls/sys_enter_sync") +def hello_world(ctx: c_void_p) -> c_int64: + print("sys_sync() called") + return c_int64(0) + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +# Compile and load +b = BPF() +b.load() +b.attach_all() +print("Tracing sys_sync()... Ctrl-C to end.") +trace_pipe() diff --git a/BCC-Examples/vfsreadlat.py b/BCC-Examples/vfsreadlat.py new file mode 100644 index 00000000..9dce0ea3 --- /dev/null +++ b/BCC-Examples/vfsreadlat.py @@ -0,0 +1,127 @@ +from pythonbpf import bpf, map, struct, section, bpfglobal, BPF +from pythonbpf.helper import ktime, pid +from pythonbpf.maps import HashMap, PerfEventArray +from ctypes import c_void_p, c_uint64 +import matplotlib.pyplot as plt +import numpy as np + + +@bpf +@struct +class latency_event: + pid: c_uint64 + delta_us: c_uint64 # Latency in microseconds + + +@bpf +@map +def start() -> HashMap: + return HashMap(key=c_uint64, value=c_uint64, max_entries=10240) + + +@bpf +@map +def events() -> PerfEventArray: + return PerfEventArray(key_size=c_uint64, value_size=c_uint64) + + +@bpf +@section("kprobe/vfs_read") +def do_entry(ctx: c_void_p) -> c_uint64: + p, ts = pid(), ktime() + start.update(p, ts) + return 0 # type: ignore [return-value] + + +@bpf +@section("kretprobe/vfs_read") +def do_return(ctx: c_void_p) -> c_uint64: + p = pid() + tsp = start.lookup(p) + + if tsp: + delta_ns = ktime() - tsp + + # Only track if latency > 1 microsecond + if delta_ns > 1000: + evt = latency_event() + evt.pid, evt.delta_us = p, delta_ns // 1000 + events.output(evt) + + start.delete(p) + + return 0 # type: ignore [return-value] + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +# Load BPF +print("Loading BPF program...") +b = BPF() +b.load() +b.attach_all() + +# Collect latencies +latencies = [] + + +def callback(cpu, event): + latencies.append(event.delta_us) + + +b["events"].open_perf_buffer(callback, struct_name="latency_event") + +print("Tracing vfs_read latency... Hit Ctrl-C to end.") + +try: + while True: + b["events"].poll(1000) + if len(latencies) > 0 and len(latencies) % 1000 == 0: + print(f"Collected {len(latencies)} samples...") + +except KeyboardInterrupt: + print(f"Collected {len(latencies)} samples. Generating histogram...") + +# Create histogram with matplotlib +if latencies: + # Use log scale for better visualization + log_latencies = np.log2(latencies) + + plt.figure(figsize=(12, 6)) + + # Plot 1: Linear histogram + plt.subplot(1, 2, 1) + plt.hist(latencies, bins=50, edgecolor="black", alpha=0.7) + plt.xlabel("Latency (microseconds)") + plt.ylabel("Count") + plt.title("VFS Read Latency Distribution (Linear)") + plt.grid(True, alpha=0.3) + + # Plot 2: Log2 histogram (like BCC) + plt.subplot(1, 2, 2) + plt.hist(log_latencies, bins=50, edgecolor="black", alpha=0.7, color="orange") + plt.xlabel("log2(Latency in µs)") + plt.ylabel("Count") + plt.title("VFS Read Latency Distribution (Log2)") + plt.grid(True, alpha=0.3) + + # Add statistics + print("Statistics:") + print(f" Count: {len(latencies)}") + print(f" Min: {min(latencies)} µs") + print(f" Max: {max(latencies)} µs") + print(f" Mean: {np.mean(latencies):.2f} µs") + print(f" Median: {np.median(latencies):.2f} µs") + print(f" P95: {np.percentile(latencies, 95):.2f} µs") + print(f" P99: {np.percentile(latencies, 99):.2f} µs") + + plt.tight_layout() + plt.savefig("vfs_read_latency.png", dpi=150) + print("Histogram saved to vfs_read_latency.png") + plt.show() +else: + print("No samples collected!") diff --git a/BCC-Examples/vfsreadlat_plotly/bpf_program.py b/BCC-Examples/vfsreadlat_plotly/bpf_program.py new file mode 100644 index 00000000..41c87cc7 --- /dev/null +++ b/BCC-Examples/vfsreadlat_plotly/bpf_program.py @@ -0,0 +1,101 @@ +"""BPF program for tracing VFS read latency.""" + +from pythonbpf import bpf, map, struct, section, bpfglobal, BPF +from pythonbpf.helper import ktime, pid +from pythonbpf.maps import HashMap, PerfEventArray +from ctypes import c_void_p, c_uint64 +import argparse +from data_collector import LatencyCollector +from dashboard import LatencyDashboard + + +@bpf +@struct +class latency_event: + pid: c_uint64 + delta_us: c_uint64 + + +@bpf +@map +def start() -> HashMap: + """Map to store start timestamps by PID.""" + return HashMap(key=c_uint64, value=c_uint64, max_entries=10240) + + +@bpf +@map +def events() -> PerfEventArray: + """Perf event array for sending latency events to userspace.""" + return PerfEventArray(key_size=c_uint64, value_size=c_uint64) + + +@bpf +@section("kprobe/vfs_read") +def do_entry(ctx: c_void_p) -> c_uint64: + """Record start time when vfs_read is called.""" + p, ts = pid(), ktime() + start.update(p, ts) + return 0 # type: ignore [return-value] + + +@bpf +@section("kretprobe/vfs_read") +def do_return(ctx: c_void_p) -> c_uint64: + """Calculate and record latency when vfs_read returns.""" + p = pid() + tsp = start.lookup(p) + + if tsp: + delta_ns = ktime() - tsp + + # Only track latencies > 1 microsecond + if delta_ns > 1000: + evt = latency_event() + evt.pid, evt.delta_us = p, delta_ns // 1000 + events.output(evt) + + start.delete(p) + + return 0 # type: ignore [return-value] + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +def parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Monitor VFS read latency with live dashboard" + ) + parser.add_argument( + "--host", default="0.0.0.0", help="Dashboard host (default: 0.0.0.0)" + ) + parser.add_argument( + "--port", type=int, default=8050, help="Dashboard port (default: 8050)" + ) + parser.add_argument( + "--buffer", type=int, default=10000, help="Recent data buffer size" + ) + return parser.parse_args() + + +args = parse_args() + +# Load BPF program +print("Loading BPF program...") +b = BPF() +b.load() +b.attach_all() +print("✅ BPF program loaded and attached") + +# Setup data collector +collector = LatencyCollector(b, buffer_size=args.buffer) +collector.start() + +# Create and run dashboard +dashboard = LatencyDashboard(collector) +dashboard.run(host=args.host, port=args.port) diff --git a/BCC-Examples/vfsreadlat_plotly/dashboard.py b/BCC-Examples/vfsreadlat_plotly/dashboard.py new file mode 100644 index 00000000..de430400 --- /dev/null +++ b/BCC-Examples/vfsreadlat_plotly/dashboard.py @@ -0,0 +1,282 @@ +"""Plotly Dash dashboard for visualizing latency data.""" + +import dash +from dash import dcc, html +from dash.dependencies import Input, Output +import plotly.graph_objects as go +from plotly.subplots import make_subplots +import numpy as np + + +class LatencyDashboard: + """Interactive dashboard for latency visualization.""" + + def __init__(self, collector, title: str = "VFS Read Latency Monitor"): + self.collector = collector + self.app = dash.Dash(__name__) + self.app.title = title + self._setup_layout() + self._setup_callbacks() + + def _setup_layout(self): + """Create dashboard layout.""" + self.app.layout = html.Div( + [ + html.H1( + "🔥 VFS Read Latency Dashboard", + style={ + "textAlign": "center", + "color": "#2c3e50", + "marginBottom": 20, + }, + ), + # Stats cards + html.Div( + [ + self._create_stat_card( + "total-samples", "📊 Total Samples", "#3498db" + ), + self._create_stat_card( + "mean-latency", "⚡ Mean Latency", "#e74c3c" + ), + self._create_stat_card( + "p99-latency", "🔥 P99 Latency", "#f39c12" + ), + ], + style={ + "display": "flex", + "justifyContent": "space-around", + "marginBottom": 30, + }, + ), + # Graphs - ✅ Make sure these IDs match the callback outputs + dcc.Graph(id="dual-histogram", style={"height": "450px"}), + dcc.Graph(id="log2-buckets", style={"height": "350px"}), + dcc.Graph(id="timeseries-graph", style={"height": "300px"}), + # Auto-update + dcc.Interval(id="interval-component", interval=1000, n_intervals=0), + ], + style={"padding": 20, "fontFamily": "Arial, sans-serif"}, + ) + + def _create_stat_card(self, id_name: str, title: str, color: str): + """Create a statistics card.""" + return html.Div( + [ + html.H3(title, style={"color": color}), + html.H2(id=id_name, style={"fontSize": 48, "color": "#2c3e50"}), + ], + className="stat-box", + style={ + "background": "white", + "padding": 20, + "borderRadius": 10, + "boxShadow": "0 4px 6px rgba(0,0,0,0.1)", + "textAlign": "center", + "flex": 1, + "margin": "0 10px", + }, + ) + + def _setup_callbacks(self): + """Setup dashboard callbacks.""" + + @self.app.callback( + [ + Output("total-samples", "children"), + Output("mean-latency", "children"), + Output("p99-latency", "children"), + Output("dual-histogram", "figure"), # ✅ Match layout IDs + Output("log2-buckets", "figure"), # ✅ Match layout IDs + Output("timeseries-graph", "figure"), # ✅ Match layout IDs + ], + [Input("interval-component", "n_intervals")], + ) + def update_dashboard(n): + stats = self.collector.get_stats() + + if stats.total == 0: + return self._empty_state() + + return ( + f"{stats.total:,}", + f"{stats.mean:.1f} µs", + f"{stats.p99:.1f} µs", + self._create_dual_histogram(), + self._create_log2_buckets(), + self._create_timeseries(), + ) + + def _empty_state(self): + """Return empty state for dashboard.""" + empty_fig = go.Figure() + empty_fig.update_layout( + title="Waiting for data... Generate some disk I/O!", template="plotly_white" + ) + # ✅ Return 6 values (3 stats + 3 figures) + return "0", "0 µs", "0 µs", empty_fig, empty_fig, empty_fig + + def _create_dual_histogram(self) -> go.Figure: + """Create side-by-side linear and log2 histograms.""" + latencies = self.collector.get_all_latencies() + + # Create subplots + fig = make_subplots( + rows=1, + cols=2, + subplot_titles=("Linear Scale", "Log2 Scale"), + horizontal_spacing=0.12, + ) + + # Linear histogram + fig.add_trace( + go.Histogram( + x=latencies, + nbinsx=50, + marker_color="rgb(55, 83, 109)", + opacity=0.75, + name="Linear", + ), + row=1, + col=1, + ) + + # Log2 histogram + log2_latencies = np.log2(latencies + 1) # +1 to avoid log2(0) + fig.add_trace( + go.Histogram( + x=log2_latencies, + nbinsx=30, + marker_color="rgb(243, 156, 18)", + opacity=0.75, + name="Log2", + ), + row=1, + col=2, + ) + + # Update axes + fig.update_xaxes(title_text="Latency (µs)", row=1, col=1) + fig.update_xaxes(title_text="log2(Latency in µs)", row=1, col=2) + fig.update_yaxes(title_text="Count", row=1, col=1) + fig.update_yaxes(title_text="Count", row=1, col=2) + + fig.update_layout( + title_text="📊 Latency Distribution (Linear vs Log2)", + template="plotly_white", + showlegend=False, + height=450, + ) + + return fig + + def _create_log2_buckets(self) -> go.Figure: + """Create bar chart of log2 buckets (like BCC histogram).""" + buckets = self.collector.get_histogram_buckets() + + if not buckets: + fig = go.Figure() + fig.update_layout( + title="🔥 Log2 Histogram - Waiting for data...", template="plotly_white" + ) + return fig + + # Sort buckets + sorted_buckets = sorted(buckets.keys()) + counts = [buckets[b] for b in sorted_buckets] + + # Create labels (e.g., "8-16µs", "16-32µs") + labels = [] + hover_text = [] + for bucket in sorted_buckets: + lower = 2**bucket + upper = 2 ** (bucket + 1) + labels.append(f"{lower}-{upper}") + + # Calculate percentage + total = sum(counts) + pct = (buckets[bucket] / total) * 100 if total > 0 else 0 + hover_text.append( + f"Range: {lower}-{upper} µs
" + f"Count: {buckets[bucket]:,}
" + f"Percentage: {pct:.2f}%" + ) + + # Create bar chart + fig = go.Figure() + + fig.add_trace( + go.Bar( + x=labels, + y=counts, + marker=dict( + color=counts, + colorscale="YlOrRd", + showscale=True, + colorbar=dict(title="Count"), + ), + text=counts, + textposition="outside", + hovertext=hover_text, + hoverinfo="text", + ) + ) + + fig.update_layout( + title="🔥 Log2 Histogram (BCC-style buckets)", + xaxis_title="Latency Range (µs)", + yaxis_title="Count", + template="plotly_white", + height=350, + xaxis=dict(tickangle=-45), + ) + + return fig + + def _create_timeseries(self) -> go.Figure: + """Create time series figure.""" + recent = self.collector.get_recent_latencies() + + if not recent: + fig = go.Figure() + fig.update_layout( + title="⏱️ Real-time Latency - Waiting for data...", + template="plotly_white", + ) + return fig + + times = [d["time"] for d in recent] + lats = [d["latency"] for d in recent] + + fig = go.Figure() + fig.add_trace( + go.Scatter( + x=times, + y=lats, + mode="lines", + line=dict(color="rgb(231, 76, 60)", width=2), + fill="tozeroy", + fillcolor="rgba(231, 76, 60, 0.2)", + ) + ) + + fig.update_layout( + title="⏱️ Real-time Latency (Last 10,000 samples)", + xaxis_title="Time (seconds)", + yaxis_title="Latency (µs)", + template="plotly_white", + height=300, + ) + + return fig + + def run(self, host: str = "0.0.0.0", port: int = 8050, debug: bool = False): + """Run the dashboard server.""" + print(f"\n{'=' * 60}") + print(f"🚀 Dashboard running at: http://{host}:{port}") + print(" Access from your browser to see live graphs") + print( + " Generate disk I/O to see data: dd if=/dev/zero of=/tmp/test bs=1M count=100" + ) + print(f"{'=' * 60}\n") + self.app.run(debug=debug, host=host, port=port) diff --git a/BCC-Examples/vfsreadlat_plotly/data_collector.py b/BCC-Examples/vfsreadlat_plotly/data_collector.py new file mode 100644 index 00000000..711e2f8a --- /dev/null +++ b/BCC-Examples/vfsreadlat_plotly/data_collector.py @@ -0,0 +1,96 @@ +"""Data collection and management.""" + +import threading +import time +import numpy as np +from collections import deque +from dataclasses import dataclass +from typing import List, Dict + + +@dataclass +class LatencyStats: + """Statistics computed from latency data.""" + + total: int = 0 + mean: float = 0.0 + median: float = 0.0 + min: float = 0.0 + max: float = 0.0 + p95: float = 0.0 + p99: float = 0.0 + + @classmethod + def from_array(cls, data: np.ndarray) -> "LatencyStats": + """Compute stats from numpy array.""" + if len(data) == 0: + return cls() + + return cls( + total=len(data), + mean=float(np.mean(data)), + median=float(np.median(data)), + min=float(np.min(data)), + max=float(np.max(data)), + p95=float(np.percentile(data, 95)), + p99=float(np.percentile(data, 99)), + ) + + +class LatencyCollector: + """Collects and manages latency data from BPF.""" + + def __init__(self, bpf_object, buffer_size: int = 10000): + self.bpf = bpf_object + self.all_latencies: List[float] = [] + self.recent_latencies = deque(maxlen=buffer_size) # type: ignore [var-annotated] + self.start_time = time.time() + self._lock = threading.Lock() + self._poll_thread = None + + def callback(self, cpu: int, event): + """Callback for BPF events.""" + with self._lock: + self.all_latencies.append(event.delta_us) + self.recent_latencies.append( + {"time": time.time() - self.start_time, "latency": event.delta_us} + ) + + def start(self): + """Start collecting data.""" + self.bpf["events"].open_perf_buffer(self.callback, struct_name="latency_event") + + def poll_loop(): + while True: + self.bpf["events"].poll(100) + + self._poll_thread = threading.Thread(target=poll_loop, daemon=True) + self._poll_thread.start() + print("✅ Data collection started") + + def get_all_latencies(self) -> np.ndarray: + """Get all latencies as numpy array.""" + with self._lock: + return np.array(self.all_latencies) if self.all_latencies else np.array([]) + + def get_recent_latencies(self) -> List[Dict]: + """Get recent latencies with timestamps.""" + with self._lock: + return list(self.recent_latencies) + + def get_stats(self) -> LatencyStats: + """Compute current statistics.""" + return LatencyStats.from_array(self.get_all_latencies()) + + def get_histogram_buckets(self) -> Dict[int, int]: + """Get log2 histogram buckets.""" + latencies = self.get_all_latencies() + if len(latencies) == 0: + return {} + + log_buckets = np.floor(np.log2(latencies + 1)).astype(int) + buckets = {} # type: ignore [var-annotated] + for bucket in log_buckets: + buckets[bucket] = buckets.get(bucket, 0) + 1 + + return buckets diff --git a/BCC-Examples/vfsreadlat_rich.py b/BCC-Examples/vfsreadlat_rich.py new file mode 100644 index 00000000..a8660acf --- /dev/null +++ b/BCC-Examples/vfsreadlat_rich.py @@ -0,0 +1,178 @@ +from pythonbpf import bpf, map, struct, section, bpfglobal, BPF +from pythonbpf.helper import ktime, pid +from pythonbpf.maps import HashMap, PerfEventArray +from ctypes import c_void_p, c_uint64 + +from rich.console import Console +from rich.live import Live +from rich.table import Table +from rich.panel import Panel +from rich.layout import Layout +import numpy as np +import threading +import time +from collections import Counter + +# ==================== BPF Setup ==================== + + +@bpf +@struct +class latency_event: + pid: c_uint64 + delta_us: c_uint64 + + +@bpf +@map +def start() -> HashMap: + return HashMap(key=c_uint64, value=c_uint64, max_entries=10240) + + +@bpf +@map +def events() -> PerfEventArray: + return PerfEventArray(key_size=c_uint64, value_size=c_uint64) + + +@bpf +@section("kprobe/vfs_read") +def do_entry(ctx: c_void_p) -> c_uint64: + p, ts = pid(), ktime() + start.update(p, ts) + return 0 # type: ignore [return-value] + + +@bpf +@section("kretprobe/vfs_read") +def do_return(ctx: c_void_p) -> c_uint64: + p = pid() + tsp = start.lookup(p) + + if tsp: + delta_ns = ktime() - tsp + + if delta_ns > 1000: + evt = latency_event() + evt.pid, evt.delta_us = p, delta_ns // 1000 + events.output(evt) + + start.delete(p) + + return 0 # type: ignore [return-value] + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +console = Console() +console.print("[bold green]Loading BPF program...[/]") + +b = BPF() +b.load() +b.attach_all() + +# ==================== Data Collection ==================== + +all_latencies = [] +histogram_buckets = Counter() # type: ignore [var-annotated] + + +def callback(cpu, event): + all_latencies.append(event.delta_us) + # Create log2 bucket + bucket = int(np.floor(np.log2(event.delta_us + 1))) + histogram_buckets[bucket] += 1 + + +b["events"].open_perf_buffer(callback, struct_name="latency_event") + + +def poll_events(): + while True: + b["events"].poll(100) + + +poll_thread = threading.Thread(target=poll_events, daemon=True) +poll_thread.start() + +# ==================== Live Display ==================== + + +def generate_display(): + layout = Layout() + layout.split_column( + Layout(name="header", size=3), + Layout(name="stats", size=8), + Layout(name="histogram", size=20), + ) + + # Header + layout["header"].update( + Panel("[bold cyan]🔥 VFS Read Latency Monitor[/]", style="bold white on blue") + ) + + # Stats + if len(all_latencies) > 0: + lats = np.array(all_latencies) + stats_table = Table(show_header=False, box=None, padding=(0, 2)) + stats_table.add_column(style="bold cyan") + stats_table.add_column(style="bold yellow") + + stats_table.add_row("📊 Total Samples:", f"{len(lats):,}") + stats_table.add_row("⚡ Mean Latency:", f"{np.mean(lats):.2f} µs") + stats_table.add_row("📉 Min Latency:", f"{np.min(lats):.2f} µs") + stats_table.add_row("📈 Max Latency:", f"{np.max(lats):.2f} µs") + stats_table.add_row("🎯 P95 Latency:", f"{np.percentile(lats, 95):.2f} µs") + stats_table.add_row("🔥 P99 Latency:", f"{np.percentile(lats, 99):.2f} µs") + + layout["stats"].update( + Panel(stats_table, title="Statistics", border_style="green") + ) + else: + layout["stats"].update( + Panel("[yellow]Waiting for data...[/]", border_style="yellow") + ) + + # Histogram + if histogram_buckets: + hist_table = Table(title="Latency Distribution", box=None) + hist_table.add_column("Range", style="cyan", no_wrap=True) + hist_table.add_column("Count", justify="right", style="yellow") + hist_table.add_column("Distribution", style="green") + + max_count = max(histogram_buckets.values()) + + for bucket in sorted(histogram_buckets.keys()): + count = histogram_buckets[bucket] + lower = 2**bucket + upper = 2 ** (bucket + 1) + + # Create bar + bar_width = int((count / max_count) * 40) + bar = "█" * bar_width + + hist_table.add_row( + f"{lower:5d}-{upper:5d} µs", + f"{count:6d}", + f"[green]{bar}[/] {count / len(all_latencies) * 100:.1f}%", + ) + + layout["histogram"].update(Panel(hist_table, border_style="green")) + + return layout + + +try: + with Live(generate_display(), refresh_per_second=2, console=console) as live: + while True: + time.sleep(0.5) + live.update(generate_display()) +except KeyboardInterrupt: + console.print("\n[bold red]Stopping...[/]") + + if all_latencies: + console.print(f"\n[bold green]✅ Collected {len(all_latencies):,} samples[/]") diff --git a/pythonbpf/__init__.py b/pythonbpf/__init__.py index 022af1ba..5f963394 100644 --- a/pythonbpf/__init__.py +++ b/pythonbpf/__init__.py @@ -1,5 +1,6 @@ from .decorators import bpf, map, section, bpfglobal, struct from .codegen import compile_to_ir, compile, BPF +from .utils import trace_pipe, trace_fields __all__ = [ "bpf", @@ -10,4 +11,6 @@ "compile_to_ir", "compile", "BPF", + "trace_pipe", + "trace_fields", ] diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index 3149c752..49c787f4 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -23,53 +23,77 @@ def __iter__(self): yield self.metadata -def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab): - """Handle memory allocation for assignment statements.""" - - # Validate assignment - if len(stmt.targets) != 1: - logger.warning("Multi-target assignment not supported, skipping allocation") - return - - target = stmt.targets[0] - - # Skip non-name targets (e.g., struct field assignments) - if isinstance(target, ast.Attribute): - logger.debug(f"Struct field assignment to {target.attr}, no allocation needed") - return - - if not isinstance(target, ast.Name): - logger.warning(f"Unsupported assignment target type: {type(target).__name__}") - return +def create_targets_and_rvals(stmt): + """Create lists of targets and right-hand values from an assignment statement.""" + if isinstance(stmt.targets[0], ast.Tuple): + if not isinstance(stmt.value, ast.Tuple): + logger.warning("Mismatched multi-target assignment, skipping allocation") + return [], [] + targets, rvals = stmt.targets[0].elts, stmt.value.elts + if len(targets) != len(rvals): + logger.warning("length of LHS != length of RHS, skipping allocation") + return [], [] + return targets, rvals + return stmt.targets, [stmt.value] - var_name = target.id - rval = stmt.value - # Skip if already allocated - if var_name in local_sym_tab: - logger.debug(f"Variable {var_name} already allocated, skipping") - return +def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab): + """Handle memory allocation for assignment statements.""" - # When allocating a variable, check if it's a vmlinux struct type - if isinstance(stmt.value, ast.Name) and VmlinuxHandlerRegistry.is_vmlinux_struct( - stmt.value.id - ): - # Handle vmlinux struct allocation - # This requires more implementation - print(stmt.value) - pass - - # Determine type and allocate based on rval - if isinstance(rval, ast.Call): - _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab) - elif isinstance(rval, ast.Constant): - _allocate_for_constant(builder, var_name, rval, local_sym_tab) - elif isinstance(rval, ast.BinOp): - _allocate_for_binop(builder, var_name, local_sym_tab) - else: - logger.warning( - f"Unsupported assignment value type for {var_name}: {type(rval).__name__}" - ) + logger.info(f"Handling assignment for allocation: {ast.dump(stmt)}") + + # NOTE: Support multi-target assignments (e.g.: a, b = 1, 2) + targets, rvals = create_targets_and_rvals(stmt) + + for target, rval in zip(targets, rvals): + # Skip non-name targets (e.g., struct field assignments) + if isinstance(target, ast.Attribute): + logger.debug( + f"Struct field assignment to {target.attr}, no allocation needed" + ) + continue + + if not isinstance(target, ast.Name): + logger.warning( + f"Unsupported assignment target type: {type(target).__name__}" + ) + continue + + var_name = target.id + + # Skip if already allocated + if var_name in local_sym_tab: + logger.debug(f"Variable {var_name} already allocated, skipping") + continue + + # When allocating a variable, check if it's a vmlinux struct type + if isinstance( + stmt.value, ast.Name + ) and VmlinuxHandlerRegistry.is_vmlinux_struct(stmt.value.id): + # Handle vmlinux struct allocation + # This requires more implementation + print(stmt.value) + pass + + # Determine type and allocate based on rval + if isinstance(rval, ast.Call): + _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab) + elif isinstance(rval, ast.Constant): + _allocate_for_constant(builder, var_name, rval, local_sym_tab) + elif isinstance(rval, ast.BinOp): + _allocate_for_binop(builder, var_name, local_sym_tab) + elif isinstance(rval, ast.Name): + # Variable-to-variable assignment (b = a) + _allocate_for_name(builder, var_name, rval, local_sym_tab) + elif isinstance(rval, ast.Attribute): + # Struct field-to-variable assignment (a = dat.fld) + _allocate_for_attribute( + builder, var_name, rval, local_sym_tab, structs_sym_tab + ) + else: + logger.warning( + f"Unsupported assignment value type for {var_name}: {type(rval).__name__}" + ) def _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab): @@ -186,3 +210,88 @@ def allocate_temp_pool(builder, max_temps, local_sym_tab): temp_var = builder.alloca(ir.IntType(64), name=temp_name) temp_var.align = 8 local_sym_tab[temp_name] = LocalSymbol(temp_var, ir.IntType(64)) + + +def _allocate_for_name(builder, var_name, rval, local_sym_tab): + """Allocate memory for variable-to-variable assignment (b = a).""" + source_var = rval.id + + if source_var not in local_sym_tab: + logger.error(f"Source variable '{source_var}' not found in symbol table") + return + + # Get type and metadata from source variable + source_symbol = local_sym_tab[source_var] + + # Allocate with same type and alignment + var = _allocate_with_type(builder, var_name, source_symbol.ir_type) + local_sym_tab[var_name] = LocalSymbol( + var, source_symbol.ir_type, source_symbol.metadata + ) + + logger.info( + f"Pre-allocated {var_name} from {source_var} with type {source_symbol.ir_type}" + ) + + +def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_tab): + """Allocate memory for struct field-to-variable assignment (a = dat.fld).""" + if not isinstance(rval.value, ast.Name): + logger.warning(f"Complex attribute access not supported for {var_name}") + return + + struct_var = rval.value.id + field_name = rval.attr + + # Validate struct and field + if struct_var not in local_sym_tab: + logger.error(f"Struct variable '{struct_var}' not found") + return + + struct_type = local_sym_tab[struct_var].metadata + if not struct_type or struct_type not in structs_sym_tab: + logger.error(f"Struct type '{struct_type}' not found") + return + + struct_info = structs_sym_tab[struct_type] + if field_name not in struct_info.fields: + logger.error(f"Field '{field_name}' not found in struct '{struct_type}'") + return + + # Get field type + field_type = struct_info.field_type(field_name) + + # Special case: char array -> allocate as i8* pointer instead + if ( + isinstance(field_type, ir.ArrayType) + and isinstance(field_type.element, ir.IntType) + and field_type.element.width == 8 + ): + alloc_type = ir.PointerType(ir.IntType(8)) + logger.info(f"Allocating {var_name} as i8* (pointer to char array)") + else: + alloc_type = field_type + + var = _allocate_with_type(builder, var_name, alloc_type) + local_sym_tab[var_name] = LocalSymbol(var, alloc_type) + + logger.info( + f"Pre-allocated {var_name} from {struct_var}.{field_name} with type {alloc_type}" + ) + + +def _allocate_with_type(builder, var_name, ir_type): + """Allocate variable with appropriate alignment for type.""" + var = builder.alloca(ir_type, name=var_name) + var.align = _get_alignment(ir_type) + return var + + +def _get_alignment(ir_type): + """Get appropriate alignment for IR type.""" + if isinstance(ir_type, ir.IntType): + return ir_type.width // 8 + elif isinstance(ir_type, ir.ArrayType) and isinstance(ir_type.element, ir.IntType): + return ir_type.element.width // 8 + else: + return 8 # Default: pointer size diff --git a/pythonbpf/assign_pass.py b/pythonbpf/assign_pass.py index ab091415..e0ef2db2 100644 --- a/pythonbpf/assign_pass.py +++ b/pythonbpf/assign_pass.py @@ -2,6 +2,7 @@ import logging from llvmlite import ir from pythonbpf.expr import eval_expr +from pythonbpf.helper import emit_probe_read_kernel_str_call logger = logging.getLogger(__name__) @@ -27,27 +28,82 @@ def handle_struct_field_assignment( # Get field pointer and evaluate value field_ptr = struct_info.gep(builder, local_sym_tab[var_name].var, field_name) - val = eval_expr( + field_type = struct_info.field_type(field_name) + val_result = eval_expr( func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab ) - if val is None: + if val_result is None: logger.error(f"Failed to evaluate value for {var_name}.{field_name}") return - # TODO: Handle string assignment to char array (not a priority) - field_type = struct_info.field_type(field_name) - if isinstance(field_type, ir.ArrayType) and val[1] == ir.PointerType(ir.IntType(8)): - logger.warning( - f"String to char array assignment not implemented for {var_name}.{field_name}" + val, val_type = val_result + + # Special case: i8* string to [N x i8] char array + if _is_char_array(field_type) and _is_i8_ptr(val_type): + _copy_string_to_char_array( + func, + module, + builder, + val, + field_ptr, + field_type, + local_sym_tab, + map_sym_tab, + structs_sym_tab, ) + logger.info(f"Copied string to char array {var_name}.{field_name}") return - # Store the value - builder.store(val[0], field_ptr) + # Regular assignment + builder.store(val, field_ptr) logger.info(f"Assigned to struct field {var_name}.{field_name}") +def _copy_string_to_char_array( + func, + module, + builder, + src_ptr, + dst_ptr, + array_type, + local_sym_tab, + map_sym_tab, + struct_sym_tab, +): + """Copy string (i8*) to char array ([N x i8]) using bpf_probe_read_kernel_str""" + + array_size = array_type.count + + # Get pointer to first element: [N x i8]* -> i8* + dst_i8_ptr = builder.gep( + dst_ptr, + [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)], + inbounds=True, + ) + + # Use the shared emitter function + emit_probe_read_kernel_str_call(builder, dst_i8_ptr, array_size, src_ptr) + + +def _is_char_array(ir_type): + """Check if type is [N x i8].""" + return ( + isinstance(ir_type, ir.ArrayType) + and isinstance(ir_type.element, ir.IntType) + and ir_type.element.width == 8 + ) + + +def _is_i8_ptr(ir_type): + """Check if type is i8*.""" + return ( + isinstance(ir_type, ir.PointerType) + and isinstance(ir_type.pointee, ir.IntType) + and ir_type.pointee.width == 8 + ) + + def handle_variable_assignment( func, module, builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab ): @@ -71,6 +127,17 @@ def handle_variable_assignment( logger.info(f"Initialized struct {struct_name} for variable {var_name}") return True + # Special case: struct field char array -> pointer + # Handle this before eval_expr to get the pointer, not the value + if isinstance(rval, ast.Attribute) and isinstance(rval.value, ast.Name): + converted_val = _try_convert_char_array_to_ptr( + rval, var_type, builder, local_sym_tab, structs_sym_tab + ) + if converted_val is not None: + builder.store(converted_val, var_ptr) + logger.info(f"Assigned char array pointer to {var_name}") + return True + val_result = eval_expr( func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab ) @@ -106,3 +173,52 @@ def handle_variable_assignment( builder.store(val, var_ptr) logger.info(f"Assigned value to variable {var_name}") return True + + +def _try_convert_char_array_to_ptr( + rval, var_type, builder, local_sym_tab, structs_sym_tab +): + """Try to convert char array field to i8* pointer""" + # Only convert if target is i8* + if not ( + isinstance(var_type, ir.PointerType) + and isinstance(var_type.pointee, ir.IntType) + and var_type.pointee.width == 8 + ): + return None + + struct_var = rval.value.id + field_name = rval.attr + + # Validate struct + if struct_var not in local_sym_tab: + return None + + struct_type = local_sym_tab[struct_var].metadata + if not struct_type or struct_type not in structs_sym_tab: + return None + + struct_info = structs_sym_tab[struct_type] + if field_name not in struct_info.fields: + return None + + field_type = struct_info.field_type(field_name) + + # Check if it's a char array + if not ( + isinstance(field_type, ir.ArrayType) + and isinstance(field_type.element, ir.IntType) + and field_type.element.width == 8 + ): + return None + + # Get pointer to struct field + struct_ptr = local_sym_tab[struct_var].var + field_ptr = struct_info.gep(builder, struct_ptr, field_name) + + # GEP to first element: [N x i8]* -> i8* + return builder.gep( + field_ptr, + [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)], + inbounds=True, + ) diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index e97b1944..17f53932 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -17,7 +17,7 @@ import subprocess import inspect from pathlib import Path -from pylibbpf import BpfProgram +from pylibbpf import BpfObject import tempfile from logging import Logger import logging @@ -70,6 +70,7 @@ def processor(source_code, filename, module): func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab) globals_list_creation(tree, module) + return structs_sym_tab, map_sym_tab def compile_to_ir(filename: str, output: str, loglevel=logging.INFO): @@ -95,7 +96,7 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO): True, ) - processor(source, filename, module) + structs_sym_tab, maps_sym_tab = processor(source, filename, module) wchar_size = module.add_metadata( [ @@ -144,7 +145,7 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO): f.write(module_string) f.write("\n") - return output + return output, structs_sym_tab, maps_sym_tab def _run_llc(ll_file, obj_file): @@ -174,7 +175,7 @@ def _run_llc(ll_file, obj_file): return False -def compile(loglevel=logging.INFO) -> bool: +def compile(loglevel=logging.WARNING) -> bool: # Look one level up the stack to the caller of this function caller_frame = inspect.stack()[1] caller_file = Path(caller_frame.filename).resolve() @@ -182,18 +183,19 @@ def compile(loglevel=logging.INFO) -> bool: ll_file = Path("/tmp") / caller_file.with_suffix(".ll").name o_file = caller_file.with_suffix(".o") - success = True - success = ( - compile_to_ir(str(caller_file), str(ll_file), loglevel=loglevel) and success + _, structs_sym_tab, maps_sym_tab = compile_to_ir( + str(caller_file), str(ll_file), loglevel=loglevel ) - success = _run_llc(ll_file, o_file) and success + if not _run_llc(ll_file, o_file): + logger.error("Compilation to object file failed.") + return False logger.info(f"Object written to {o_file}") - return success + return True -def BPF(loglevel=logging.INFO) -> BpfProgram: +def BPF(loglevel=logging.WARNING) -> BpfObject: caller_frame = inspect.stack()[1] src = inspect.getsource(caller_frame.frame) with tempfile.NamedTemporaryFile( @@ -206,7 +208,9 @@ def BPF(loglevel=logging.INFO) -> BpfProgram: f.write(src) f.flush() source = f.name - compile_to_ir(source, str(inter.name), loglevel=loglevel) + _, structs_sym_tab, maps_sym_tab = compile_to_ir( + source, str(inter.name), loglevel=loglevel + ) _run_llc(str(inter.name), str(obj_file.name)) - return BpfProgram(str(obj_file.name)) + return BpfObject(str(obj_file.name), structs=structs_sym_tab) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 82433441..e391092b 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -12,7 +12,11 @@ handle_variable_assignment, handle_struct_field_assignment, ) -from pythonbpf.allocation_pass import handle_assign_allocation, allocate_temp_pool +from pythonbpf.allocation_pass import ( + handle_assign_allocation, + allocate_temp_pool, + create_targets_and_rvals, +) from .return_utils import handle_none_return, handle_xdp_return, is_xdp_name from .function_metadata import get_probe_string, is_global_function, infer_return_type @@ -140,48 +144,43 @@ def handle_assign( ): """Handle assignment statements in the function body.""" - # TODO: Support this later - # GH #37 - if len(stmt.targets) != 1: - logger.error("Multi-target assignment is not supported for now") - return - - target = stmt.targets[0] - rval = stmt.value + # NOTE: Support multi-target assignments (e.g.: a, b = 1, 2) + targets, rvals = create_targets_and_rvals(stmt) - if isinstance(target, ast.Name): - # NOTE: Simple variable assignment case: x = 5 - var_name = target.id - result = handle_variable_assignment( - func, - module, - builder, - var_name, - rval, - local_sym_tab, - map_sym_tab, - structs_sym_tab, - ) - if not result: - logger.error(f"Failed to handle assignment to {var_name}") - return + for target, rval in zip(targets, rvals): + if isinstance(target, ast.Name): + # NOTE: Simple variable assignment case: x = 5 + var_name = target.id + result = handle_variable_assignment( + func, + module, + builder, + var_name, + rval, + local_sym_tab, + map_sym_tab, + structs_sym_tab, + ) + if not result: + logger.error(f"Failed to handle assignment to {var_name}") + continue - if isinstance(target, ast.Attribute): - # NOTE: Struct field assignment case: pkt.field = value - handle_struct_field_assignment( - func, - module, - builder, - target, - rval, - local_sym_tab, - map_sym_tab, - structs_sym_tab, - ) - return + if isinstance(target, ast.Attribute): + # NOTE: Struct field assignment case: pkt.field = value + handle_struct_field_assignment( + func, + module, + builder, + target, + rval, + local_sym_tab, + map_sym_tab, + structs_sym_tab, + ) + continue - # Unsupported target type - logger.error(f"Unsupported assignment target: {ast.dump(target)}") + # Unsupported target type + logger.error(f"Unsupported assignment target: {ast.dump(target)}") def handle_cond( diff --git a/pythonbpf/helper/__init__.py b/pythonbpf/helper/__init__.py index 9f301b2a..2f9c3473 100644 --- a/pythonbpf/helper/__init__.py +++ b/pythonbpf/helper/__init__.py @@ -1,7 +1,7 @@ from .helper_registry import HelperHandlerRegistry from .helper_utils import reset_scratch_pool -from .bpf_helper_handler import handle_helper_call -from .helpers import ktime, pid, deref, XDP_DROP, XDP_PASS +from .bpf_helper_handler import handle_helper_call, emit_probe_read_kernel_str_call +from .helpers import ktime, pid, deref, comm, probe_read_str, XDP_DROP, XDP_PASS # Register the helper handler with expr module @@ -59,9 +59,12 @@ def helper_call_handler( "HelperHandlerRegistry", "reset_scratch_pool", "handle_helper_call", + "emit_probe_read_kernel_str_call", "ktime", "pid", "deref", + "comm", + "probe_read_str", "XDP_DROP", "XDP_PASS", ] diff --git a/pythonbpf/helper/bpf_helper_handler.py b/pythonbpf/helper/bpf_helper_handler.py index 6e296148..78686778 100644 --- a/pythonbpf/helper/bpf_helper_handler.py +++ b/pythonbpf/helper/bpf_helper_handler.py @@ -7,6 +7,9 @@ get_or_create_ptr_from_arg, get_flags_val, get_data_ptr_and_size, + get_buffer_ptr_and_size, + get_char_array_ptr_and_size, + get_ptr_from_arg, ) from .printk_formatter import simple_string_print, handle_fstring_print @@ -23,7 +26,9 @@ class BPFHelperID(Enum): BPF_KTIME_GET_NS = 5 BPF_PRINTK = 6 BPF_GET_CURRENT_PID_TGID = 14 + BPF_GET_CURRENT_COMM = 16 BPF_PERF_EVENT_OUTPUT = 25 + BPF_PROBE_READ_KERNEL_STR = 115 @HelperHandlerRegistry.register("ktime") @@ -234,6 +239,63 @@ def bpf_map_delete_elem_emitter( return result, None +@HelperHandlerRegistry.register("comm") +def bpf_get_current_comm_emitter( + call, + map_ptr, + module, + builder, + func, + local_sym_tab=None, + struct_sym_tab=None, + map_sym_tab=None, +): + """ + Emit LLVM IR for bpf_get_current_comm helper function call. + + Accepts: comm(dataobj.field) or comm(my_buffer) + """ + if not call.args or len(call.args) != 1: + raise ValueError( + f"comm expects exactly one argument (buffer), got {len(call.args)}" + ) + + buf_arg = call.args[0] + + # Extract buffer pointer and size + buf_ptr, buf_size = get_buffer_ptr_and_size( + buf_arg, builder, local_sym_tab, struct_sym_tab + ) + + # Validate it's a char array + if not isinstance( + buf_ptr.type.pointee, ir.ArrayType + ) or buf_ptr.type.pointee.element != ir.IntType(8): + raise ValueError( + f"comm expects a char array buffer, got {buf_ptr.type.pointee}" + ) + + # Cast to void* and call helper + buf_void_ptr = builder.bitcast(buf_ptr, ir.PointerType()) + + fn_type = ir.FunctionType( + ir.IntType(64), + [ir.PointerType(), ir.IntType(32)], + var_arg=False, + ) + fn_ptr = builder.inttoptr( + ir.Constant(ir.IntType(64), BPFHelperID.BPF_GET_CURRENT_COMM.value), + ir.PointerType(fn_type), + ) + + result = builder.call( + fn_ptr, [buf_void_ptr, ir.Constant(ir.IntType(32), buf_size)], tail=False + ) + + logger.info(f"Emitted bpf_get_current_comm with {buf_size} byte buffer") + return result, None + + @HelperHandlerRegistry.register("pid") def bpf_get_current_pid_tgid_emitter( call, @@ -309,6 +371,68 @@ def bpf_perf_event_output_handler( return result, None +def emit_probe_read_kernel_str_call(builder, dst_ptr, dst_size, src_ptr): + """Emit LLVM IR call to bpf_probe_read_kernel_str""" + + fn_type = ir.FunctionType( + ir.IntType(64), + [ir.PointerType(), ir.IntType(32), ir.PointerType()], + var_arg=False, + ) + fn_ptr = builder.inttoptr( + ir.Constant(ir.IntType(64), BPFHelperID.BPF_PROBE_READ_KERNEL_STR.value), + ir.PointerType(fn_type), + ) + + result = builder.call( + fn_ptr, + [ + builder.bitcast(dst_ptr, ir.PointerType()), + ir.Constant(ir.IntType(32), dst_size), + builder.bitcast(src_ptr, ir.PointerType()), + ], + tail=False, + ) + + logger.info(f"Emitted bpf_probe_read_kernel_str (size={dst_size})") + return result + + +@HelperHandlerRegistry.register("probe_read_str") +def bpf_probe_read_kernel_str_emitter( + call, + map_ptr, + module, + builder, + func, + local_sym_tab=None, + struct_sym_tab=None, + map_sym_tab=None, +): + """Emit LLVM IR for bpf_probe_read_kernel_str helper.""" + + if len(call.args) != 2: + raise ValueError( + f"probe_read_str expects 2 args (dst, src), got {len(call.args)}" + ) + + # Get destination buffer (char array -> i8*) + dst_ptr, dst_size = get_char_array_ptr_and_size( + call.args[0], builder, local_sym_tab, struct_sym_tab + ) + + # Get source pointer (evaluate expression) + src_ptr, src_type = get_ptr_from_arg( + call.args[1], func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab + ) + + # Emit the helper call + result = emit_probe_read_kernel_str_call(builder, dst_ptr, dst_size, src_ptr) + + logger.info(f"Emitted bpf_probe_read_kernel_str (size={dst_size})") + return result, ir.IntType(64) + + def handle_helper_call( call, module, diff --git a/pythonbpf/helper/helper_utils.py b/pythonbpf/helper/helper_utils.py index 4b04464c..fdfd4524 100644 --- a/pythonbpf/helper/helper_utils.py +++ b/pythonbpf/helper/helper_utils.py @@ -4,6 +4,7 @@ from llvmlite import ir from pythonbpf.expr import ( get_operand_value, + eval_expr, ) logger = logging.getLogger(__name__) @@ -136,3 +137,140 @@ def get_data_ptr_and_size(data_arg, local_sym_tab, struct_sym_tab): raise NotImplementedError( "Only simple object names are supported as data in perf event output." ) + + +def get_buffer_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab): + """Extract buffer pointer and size from either a struct field or variable.""" + + # Case 1: Struct field (obj.field) + if isinstance(buf_arg, ast.Attribute): + if not isinstance(buf_arg.value, ast.Name): + raise ValueError( + "Only simple struct field access supported (e.g., obj.field)" + ) + + struct_name = buf_arg.value.id + field_name = buf_arg.attr + + # Lookup struct + if not local_sym_tab or struct_name not in local_sym_tab: + raise ValueError(f"Struct '{struct_name}' not found") + + struct_type = local_sym_tab[struct_name].metadata + if not struct_sym_tab or struct_type not in struct_sym_tab: + raise ValueError(f"Struct type '{struct_type}' not found") + + struct_info = struct_sym_tab[struct_type] + + # Get field pointer and type + struct_ptr = local_sym_tab[struct_name].var + field_ptr = struct_info.gep(builder, struct_ptr, field_name) + field_type = struct_info.field_type(field_name) + + if not isinstance(field_type, ir.ArrayType): + raise ValueError(f"Field '{field_name}' must be an array type") + + return field_ptr, field_type.count + + # Case 2: Variable name + elif isinstance(buf_arg, ast.Name): + var_name = buf_arg.id + + if not local_sym_tab or var_name not in local_sym_tab: + raise ValueError(f"Variable '{var_name}' not found") + + var_ptr = local_sym_tab[var_name].var + var_type = local_sym_tab[var_name].ir_type + + if not isinstance(var_type, ir.ArrayType): + raise ValueError(f"Variable '{var_name}' must be an array type") + + return var_ptr, var_type.count + + else: + raise ValueError( + "comm expects either a struct field (obj.field) or variable name" + ) + + +def get_char_array_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab): + """Get pointer to char array and its size.""" + + # Struct field: obj.field + if isinstance(buf_arg, ast.Attribute) and isinstance(buf_arg.value, ast.Name): + var_name = buf_arg.value.id + field_name = buf_arg.attr + + if not (local_sym_tab and var_name in local_sym_tab): + raise ValueError(f"Variable '{var_name}' not found") + + struct_type = local_sym_tab[var_name].metadata + if not (struct_sym_tab and struct_type in struct_sym_tab): + raise ValueError(f"Struct type '{struct_type}' not found") + + struct_info = struct_sym_tab[struct_type] + if field_name not in struct_info.fields: + raise ValueError(f"Field '{field_name}' not found") + + field_type = struct_info.field_type(field_name) + if not _is_char_array(field_type): + raise ValueError("Expected char array field") + + struct_ptr = local_sym_tab[var_name].var + field_ptr = struct_info.gep(builder, struct_ptr, field_name) + + # GEP to first element: [N x i8]* -> i8* + buf_ptr = builder.gep( + field_ptr, + [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)], + inbounds=True, + ) + return buf_ptr, field_type.count + + elif isinstance(buf_arg, ast.Name): + # NOTE: We shouldn't be doing this as we can't get size info + var_name = buf_arg.id + if not (local_sym_tab and var_name in local_sym_tab): + raise ValueError(f"Variable '{var_name}' not found") + + var_ptr = local_sym_tab[var_name].var + var_type = local_sym_tab[var_name].ir_type + + if not isinstance(var_type, ir.PointerType) or not isinstance( + var_type.pointee, ir.IntType(8) + ): + raise ValueError("Expected str ptr variable") + + return var_ptr, 256 # Size unknown for str ptr, using 256 as default + + else: + raise ValueError("Expected struct field or variable name") + + +def _is_char_array(ir_type): + """Check if IR type is [N x i8].""" + return ( + isinstance(ir_type, ir.ArrayType) + and isinstance(ir_type.element, ir.IntType) + and ir_type.element.width == 8 + ) + + +def get_ptr_from_arg( + arg, func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab +): + """Evaluate argument and return pointer value""" + + result = eval_expr( + func, module, builder, arg, local_sym_tab, map_sym_tab, struct_sym_tab + ) + + if not result: + raise ValueError("Failed to evaluate argument") + + val, val_type = result + + if not isinstance(val_type, ir.PointerType): + raise ValueError(f"Expected pointer type, got {val_type}") + + return val, val_type diff --git a/pythonbpf/helper/helpers.py b/pythonbpf/helper/helpers.py index 1a84599b..cb1a8e12 100644 --- a/pythonbpf/helper/helpers.py +++ b/pythonbpf/helper/helpers.py @@ -2,19 +2,31 @@ def ktime(): + """get current ktime""" return ctypes.c_int64(0) def pid(): + """get current process id""" return ctypes.c_int32(0) def deref(ptr): - "dereference a pointer" + """dereference a pointer""" result = ctypes.cast(ptr, ctypes.POINTER(ctypes.c_void_p)).contents.value return result if result is not None else 0 +def comm(buf): + """get current process command name""" + return ctypes.c_int64(0) + + +def probe_read_str(dst, src): + """Safely read a null-terminated string from kernel memory""" + return ctypes.c_int64(0) + + XDP_ABORTED = ctypes.c_int64(0) XDP_DROP = ctypes.c_int64(1) XDP_PASS = ctypes.c_int64(2) diff --git a/pythonbpf/helper/printk_formatter.py b/pythonbpf/helper/printk_formatter.py index 66fcb502..a18f1354 100644 --- a/pythonbpf/helper/printk_formatter.py +++ b/pythonbpf/helper/printk_formatter.py @@ -184,6 +184,15 @@ def _populate_fval(ftype, node, fmt_parts, exprs): raise NotImplementedError( f"Unsupported pointer target type in f-string: {target}" ) + elif isinstance(ftype, ir.ArrayType): + if isinstance(ftype.element, ir.IntType) and ftype.element.width == 8: + # Char array + fmt_parts.append("%s") + exprs.append(node) + else: + raise NotImplementedError( + f"Unsupported array element type in f-string: {ftype.element}" + ) else: raise NotImplementedError(f"Unsupported field type in f-string: {ftype}") @@ -208,44 +217,100 @@ def _create_format_string_global(fmt_str, func, module, builder): def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_tab): """Evaluate and prepare an expression to use as an arg for bpf_printk.""" - val, _ = eval_expr( - func, - module, - builder, - expr, - local_sym_tab, - None, - struct_sym_tab, + + # Special case: struct field char array needs pointer to first element + char_array_ptr = _get_struct_char_array_ptr( + expr, builder, local_sym_tab, struct_sym_tab ) + if char_array_ptr: + return char_array_ptr - if val: - if isinstance(val.type, ir.PointerType): - target, depth = get_base_type_and_depth(val.type) - if isinstance(target, ir.IntType): - if target.width >= 32: - val = deref_to_depth(func, builder, val, depth) - val = builder.sext(val, ir.IntType(64)) - elif target.width == 8 and depth == 1: - # NOTE: i8* is string, no need to deref - pass + # Regular expression evaluation + val, _ = eval_expr(func, module, builder, expr, local_sym_tab, None, struct_sym_tab) - else: - logger.warning( - "Only int and ptr supported in bpf_printk args. Others default to 0." - ) - val = ir.Constant(ir.IntType(64), 0) - elif isinstance(val.type, ir.IntType): - if val.type.width < 64: - val = builder.sext(val, ir.IntType(64)) - else: - logger.warning( - "Only int and ptr supported in bpf_printk args. Others default to 0." - ) - val = ir.Constant(ir.IntType(64), 0) - return val + if not val: + logger.warning("Failed to evaluate expression for bpf_printk, defaulting to 0") + return ir.Constant(ir.IntType(64), 0) + + # Convert value to bpf_printk compatible type + if isinstance(val.type, ir.PointerType): + return _handle_pointer_arg(val, func, builder) + elif isinstance(val.type, ir.IntType): + return _handle_int_arg(val, builder) else: - logger.warning( - "Failed to evaluate expression for bpf_printk argument. " - "It will be converted to 0." - ) + logger.warning(f"Unsupported type {val.type} in bpf_printk, defaulting to 0") return ir.Constant(ir.IntType(64), 0) + + +def _get_struct_char_array_ptr(expr, builder, local_sym_tab, struct_sym_tab): + """Get pointer to first element of char array in struct field, or None.""" + if not (isinstance(expr, ast.Attribute) and isinstance(expr.value, ast.Name)): + return None + + var_name = expr.value.id + field_name = expr.attr + + # Check if it's a valid struct field + if not ( + local_sym_tab + and var_name in local_sym_tab + and struct_sym_tab + and local_sym_tab[var_name].metadata in struct_sym_tab + ): + return None + + struct_type = local_sym_tab[var_name].metadata + struct_info = struct_sym_tab[struct_type] + + if field_name not in struct_info.fields: + return None + + field_type = struct_info.field_type(field_name) + + # Check if it's a char array + is_char_array = ( + isinstance(field_type, ir.ArrayType) + and isinstance(field_type.element, ir.IntType) + and field_type.element.width == 8 + ) + + if not is_char_array: + return None + + # Get field pointer and GEP to first element: [N x i8]* -> i8* + struct_ptr = local_sym_tab[var_name].var + field_ptr = struct_info.gep(builder, struct_ptr, field_name) + + return builder.gep( + field_ptr, + [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)], + inbounds=True, + ) + + +def _handle_pointer_arg(val, func, builder): + """Convert pointer type for bpf_printk.""" + target, depth = get_base_type_and_depth(val.type) + + if not isinstance(target, ir.IntType): + logger.warning("Only int pointers supported in bpf_printk, defaulting to 0") + return ir.Constant(ir.IntType(64), 0) + + # i8* is string - use as-is + if target.width == 8 and depth == 1: + return val + + # Integer pointers: dereference and sign-extend to i64 + if target.width >= 32: + val = deref_to_depth(func, builder, val, depth) + return builder.sext(val, ir.IntType(64)) + + logger.warning("Unsupported pointer width in bpf_printk, defaulting to 0") + return ir.Constant(ir.IntType(64), 0) + + +def _handle_int_arg(val, builder): + """Convert integer type for bpf_printk (sign-extend to i64).""" + if val.type.width < 64: + return builder.sext(val, ir.IntType(64)) + return val diff --git a/pythonbpf/utils.py b/pythonbpf/utils.py new file mode 100644 index 00000000..1540801f --- /dev/null +++ b/pythonbpf/utils.py @@ -0,0 +1,58 @@ +import subprocess + + +def trace_pipe(): + """Util to read from the trace pipe.""" + try: + subprocess.run(["cat", "/sys/kernel/tracing/trace_pipe"]) + except KeyboardInterrupt: + print("Tracing stopped.") + except (FileNotFoundError, PermissionError) as e: + print(f"Error accessing trace_pipe: {e}. Try running as root.") + + +def trace_fields(): + """Parse one line from trace_pipe into fields.""" + with open("/sys/kernel/tracing/trace_pipe", "rb", buffering=0) as f: + while True: + line = f.readline().rstrip() + + if not line: + continue + + # Skip lost event lines + if line.startswith(b"CPU:"): + continue + + # Parse BCC-style: first 16 bytes = task + task = line[:16].lstrip().decode("utf-8") + line = line[17:] # Skip past task field and space + + # Find the colon that ends "pid cpu flags timestamp" + ts_end = line.find(b":") + if ts_end == -1: + raise ValueError("Cannot parse trace line") + + # Split "pid [cpu] flags timestamp" + try: + parts = line[:ts_end].split() + if len(parts) < 4: + raise ValueError("Not enough fields") + + pid = int(parts[0]) + cpu = parts[1][1:-1] # Remove brackets from [cpu] + cpu = int(cpu) + flags = parts[2] + ts = float(parts[3]) + except (ValueError, IndexError): + raise ValueError("Cannot parse trace line") + + # Get message: skip ": symbol:" part + line = line[ts_end + 1 :] # Skip first ":" + sym_end = line.find(b":") + if sym_end != -1: + msg = line[sym_end + 2 :].decode("utf-8") # Skip ": " after symbol + else: + msg = line.lstrip().decode("utf-8") + + return (task, pid, cpu, flags, ts, msg) diff --git a/tests/passing_tests/assign/ptr_to_char_array.py b/tests/passing_tests/assign/ptr_to_char_array.py new file mode 100644 index 00000000..6a090be2 --- /dev/null +++ b/tests/passing_tests/assign/ptr_to_char_array.py @@ -0,0 +1,28 @@ +from pythonbpf import bpf, struct, section, bpfglobal +from pythonbpf.helper import comm + +from ctypes import c_void_p, c_int64 + + +@bpf +@struct +class data_t: + comm: str(16) # type: ignore [valid-type] + copp: str(16) # type: ignore [valid-type] + + +@bpf +@section("tracepoint/syscalls/sys_enter_clone") +def hello(ctx: c_void_p) -> c_int64: + dataobj = data_t() + comm(dataobj.comm) + strobj = dataobj.comm + dataobj.copp = strobj + print(f"clone called by comm {dataobj.copp}") + return 0 + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" diff --git a/tests/passing_tests/assign/struct_field_to_var_str.py b/tests/passing_tests/assign/struct_field_to_var_str.py new file mode 100644 index 00000000..1374b7c9 --- /dev/null +++ b/tests/passing_tests/assign/struct_field_to_var_str.py @@ -0,0 +1,26 @@ +from pythonbpf import bpf, struct, section, bpfglobal +from pythonbpf.helper import comm + +from ctypes import c_void_p, c_int64 + + +@bpf +@struct +class data_t: + comm: str(16) # type: ignore [valid-type] + + +@bpf +@section("tracepoint/syscalls/sys_enter_clone") +def hello(ctx: c_void_p) -> c_int64: + dataobj = data_t() + comm(dataobj.comm) + strobj = dataobj.comm + print(f"clone called by comm {strobj}") + return 0 + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL"