From 4cf284a81ff5a1e0eb8a6e43246152d711f2eead Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 26 Sep 2025 00:24:10 +0530 Subject: [PATCH 1/8] provide type as weel in eval_expr --- pythonbpf/expr_pass.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pythonbpf/expr_pass.py b/pythonbpf/expr_pass.py index cc969a5..479d153 100644 --- a/pythonbpf/expr_pass.py +++ b/pythonbpf/expr_pass.py @@ -8,15 +8,15 @@ def eval_expr(func, module, builder, expr, local_sym_tab, map_sym_tab, structs_s if expr.id in local_sym_tab: var = local_sym_tab[expr.id][0] val = builder.load(var) - return val + return val, local_sym_tab[expr.id][1] # return value and type else: print(f"Undefined variable {expr.id}") return None elif isinstance(expr, ast.Constant): if isinstance(expr.value, int): - return ir.Constant(ir.IntType(64), expr.value) + return ir.Constant(ir.IntType(64), expr.value), ir.IntType(64) elif isinstance(expr.value, bool): - return ir.Constant(ir.IntType(1), int(expr.value)) + return ir.Constant(ir.IntType(1), int(expr.value)), ir.IntType(1) else: print("Unsupported constant type") return None @@ -44,8 +44,9 @@ def eval_expr(func, module, builder, expr, local_sym_tab, map_sym_tab, structs_s if arg is None: print("Failed to evaluate deref argument") return None + # Since we are handling only name case, directly take type from sym tab val = builder.load(arg) - return val + return val, local_sym_tab[expr.args[0].id][1] # check for helpers if expr.func.id in helper_func_list: From 51595f9ec26f2c98281495083798689030d2e6a3 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 26 Sep 2025 00:28:10 +0530 Subject: [PATCH 2/8] Add types returns to bpf helpers --- pythonbpf/bpf_helper_handler.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pythonbpf/bpf_helper_handler.py b/pythonbpf/bpf_helper_handler.py index 91dd81e..c6c6947 100644 --- a/pythonbpf/bpf_helper_handler.py +++ b/pythonbpf/bpf_helper_handler.py @@ -13,7 +13,7 @@ def bpf_ktime_get_ns_emitter(call, map_ptr, module, builder, func, local_sym_tab fn_ptr_type = ir.PointerType(fn_type) fn_ptr = builder.inttoptr(helper_id, fn_ptr_type) result = builder.call(fn_ptr, [], tail=False) - return result + return result, ir.IntType(64) def bpf_map_lookup_elem_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None): @@ -60,7 +60,7 @@ def bpf_map_lookup_elem_emitter(call, map_ptr, module, builder, func, local_sym_ result = builder.call(fn_ptr, [map_void_ptr, key_ptr], tail=False) - return result + return result, ir.PointerType() def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, local_var_metadata=None): @@ -87,8 +87,8 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, elif isinstance(value, ast.FormattedValue): print("Formatted value:", ast.dump(value)) # Assume int for now - fmt_parts.append("%lld") if isinstance(value.value, ast.Name): + fmt_parts.append("%lld") exprs.append(value.value) else: raise NotImplementedError( @@ -266,7 +266,7 @@ def bpf_map_update_elem_emitter(call, map_ptr, module, builder, func, local_sym_ result = builder.call( fn_ptr, [map_void_ptr, key_ptr, value_ptr, flags_const], tail=False) - return result + return result, None def bpf_map_delete_elem_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None): @@ -321,7 +321,7 @@ def bpf_map_delete_elem_emitter(call, map_ptr, module, builder, func, local_sym_ # Call the helper function result = builder.call(fn_ptr, [map_void_ptr, key_ptr], tail=False) - return result + return result, None def bpf_get_current_pid_tgid_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, local_var_metadata=None): @@ -338,7 +338,7 @@ def bpf_get_current_pid_tgid_emitter(call, map_ptr, module, builder, func, local # Extract the lower 32 bits (PID) using bitwise AND with 0xFFFFFFFF mask = ir.Constant(ir.IntType(64), 0xFFFFFFFF) pid = builder.and_(result, mask) - return pid + return pid, ir.IntType(64) def bpf_perf_event_output_handler(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None): @@ -387,7 +387,7 @@ def bpf_perf_event_output_handler(call, map_ptr, module, builder, func, local_sy result = builder.call( fn_ptr, [ctx_ptr, map_void_ptr, flags_val, data_void_ptr, size_val], tail=False) - return result + return result, None else: raise NotImplementedError( "Only simple object names are supported as data in perf event output.") From ee03ac04d08f244c0ed2d8630dddb5b0c4a76ed3 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 26 Sep 2025 01:02:10 +0530 Subject: [PATCH 3/8] Fix printk handler to comply with new symtab convention --- pythonbpf/bpf_helper_handler.py | 4 ++-- pythonbpf/codegen.py | 4 +++- pythonbpf/expr_pass.py | 2 +- pythonbpf/functions_pass.py | 9 ++++----- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/pythonbpf/bpf_helper_handler.py b/pythonbpf/bpf_helper_handler.py index c6c6947..ae4f97b 100644 --- a/pythonbpf/bpf_helper_handler.py +++ b/pythonbpf/bpf_helper_handler.py @@ -121,7 +121,8 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, "Warning: bpf_printk supports up to 3 arguments, extra arguments will be ignored.") for expr in exprs[:3]: - val = eval_expr(func, module, builder, expr, local_sym_tab, None) + val, _ = eval_expr(func, module, builder, + expr, local_sym_tab, None) if val: if isinstance(val.type, ir.PointerType): val = builder.ptrtoint(val, ir.IntType(64)) @@ -137,7 +138,6 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, print( "Warning: Failed to evaluate expression for bpf_printk argument. It will be converted to 0.") args.append(ir.Constant(ir.IntType(64), 0)) - fn_type = ir.FunctionType(ir.IntType( 64), [ir.PointerType(), ir.IntType(32)], var_arg=True) fn_ptr_type = ir.PointerType(fn_type) diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index df30cde..52a8044 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -93,6 +93,7 @@ def compile_to_ir(filename: str, output: str): module.add_named_metadata("llvm.ident", ["llvmlite PythonBPF v0.0.1"]) + print(f"IR written to {output}") with open(output, "w") as f: f.write(f"source_filename = \"{filename}\"\n") f.write(str(module)) @@ -118,6 +119,7 @@ def compile(): print(f"Object written to {o_file}, {ll_file} can be removed") + def BPF() -> BpfProgram: caller_frame = inspect.stack()[1] caller_file = Path(caller_frame.filename).resolve() @@ -129,5 +131,5 @@ def BPF() -> BpfProgram: "llc", "-march=bpf", "-filetype=obj", "-O2", str(ll_file), "-o", str(o_file) ], check=True) - + return BpfProgram(str(o_file)) diff --git a/pythonbpf/expr_pass.py b/pythonbpf/expr_pass.py index 479d153..22641bf 100644 --- a/pythonbpf/expr_pass.py +++ b/pythonbpf/expr_pass.py @@ -3,7 +3,7 @@ def eval_expr(func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab=None, local_var_metadata=None): - print(f"Evaluating expression: {expr}") + print(f"Evaluating expression: {ast.dump(expr)}") if isinstance(expr, ast.Name): if expr.id in local_sym_tab: var = local_sym_tab[expr.id][0] diff --git a/pythonbpf/functions_pass.py b/pythonbpf/functions_pass.py index d7188f2..4b3a17f 100644 --- a/pythonbpf/functions_pass.py +++ b/pythonbpf/functions_pass.py @@ -61,7 +61,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc if val is None: print("Failed to evaluate struct field assignment") return - builder.store(val, field_ptr) + builder.store(val[0], field_ptr) print(f"Assigned to struct field {var_name}.{field_name}") return elif isinstance(rval, ast.Constant): @@ -114,7 +114,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc # var.align = 8 val = handle_helper_call( rval, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata) - builder.store(val, local_sym_tab[var_name][0]) + builder.store(val[0], local_sym_tab[var_name][0]) # local_sym_tab[var_name] = var print(f"Assigned constant {rval.func.id} to {var_name}") elif call_type == "deref" and len(rval.args) == 1: @@ -125,7 +125,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc print("Failed to evaluate deref argument") return print(f"Dereferenced value: {val}, storing in {var_name}") - builder.store(val, local_sym_tab[var_name][0]) + builder.store(val[0], local_sym_tab[var_name][0]) # local_sym_tab[var_name] = var print(f"Dereferenced and assigned to {var_name}") elif call_type in structs_sym_tab and len(rval.args) == 0: @@ -155,7 +155,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc rval, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata) # var = builder.alloca(ir.IntType(64), name=var_name) # var.align = 8 - builder.store(val, local_sym_tab[var_name][0]) + builder.store(val[0], local_sym_tab[var_name][0]) # local_sym_tab[var_name] = var else: print("Unsupported assignment call structure") @@ -462,7 +462,6 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t process_func_body(module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab) - return func From da8a495da79e3c519e9e1762324846aef59b986e Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 26 Sep 2025 04:05:37 +0530 Subject: [PATCH 4/8] Fix handle_cond for new symtab convention --- pythonbpf/functions_pass.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pythonbpf/functions_pass.py b/pythonbpf/functions_pass.py index 4b3a17f..28c1caf 100644 --- a/pythonbpf/functions_pass.py +++ b/pythonbpf/functions_pass.py @@ -196,12 +196,12 @@ def handle_cond(func, module, builder, cond, local_sym_tab, map_sym_tab): return None elif isinstance(cond, ast.Compare): lhs = eval_expr(func, module, builder, cond.left, - local_sym_tab, map_sym_tab) + local_sym_tab, map_sym_tab)[0] if len(cond.ops) != 1 or len(cond.comparators) != 1: print("Unsupported complex comparison") return None rhs = eval_expr(func, module, builder, - cond.comparators[0], local_sym_tab, map_sym_tab) + cond.comparators[0], local_sym_tab, map_sym_tab)[0] op = cond.ops[0] if lhs.type != rhs.type: From 737c4d30391d91f882091b0aeda42792b2a73a80 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 26 Sep 2025 04:17:29 +0530 Subject: [PATCH 5/8] Support storing and printing string type --- examples/execve5.py | 2 +- pythonbpf/bpf_helper_handler.py | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/examples/execve5.py b/examples/execve5.py index f716420..691f008 100644 --- a/examples/execve5.py +++ b/examples/execve5.py @@ -27,7 +27,7 @@ def hello(ctx: c_void_p) -> c_int32: process_id = pid() dataobj.pid = process_id dataobj.ts = ts - print(f"clone called at {ts} by pid {process_id}") + print(f"clone called at {ts} by pid {process_id}, str is {strobj}") events.output(dataobj) return c_int32(0) diff --git a/pythonbpf/bpf_helper_handler.py b/pythonbpf/bpf_helper_handler.py index ae4f97b..34b1f04 100644 --- a/pythonbpf/bpf_helper_handler.py +++ b/pythonbpf/bpf_helper_handler.py @@ -75,6 +75,7 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, exprs = [] for value in call.args[0].values: + print("Value in f-string:", ast.dump(value)) if isinstance(value, ast.Constant): if isinstance(value.value, str): fmt_parts.append(value.value) @@ -86,10 +87,24 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, "Only string and integer constants are supported in f-string.") elif isinstance(value, ast.FormattedValue): print("Formatted value:", ast.dump(value)) - # Assume int for now + # TODO: Dirty handling here, only checks for int or str if isinstance(value.value, ast.Name): - fmt_parts.append("%lld") - exprs.append(value.value) + if local_sym_tab and value.value.id in local_sym_tab: + var_ptr, var_type = local_sym_tab[value.value.id] + if isinstance(var_type, ir.IntType): + fmt_parts.append("%lld") + exprs.append(value.value) + elif var_type == ir.PointerType(ir.IntType(8)): + # Case with string + fmt_parts.append("%s") + exprs.append(value.value) + else: + raise NotImplementedError( + "Only integer and pointer types are supported in formatted values.") + print("Formatted value variable:", var_ptr, var_type) + else: + raise ValueError( + f"Variable {value.value.id} not found in local symbol table.") else: raise NotImplementedError( "Only simple variable names are supported in formatted values.") From b9ddecd6b1a0e5c4d3b874285f53a18ede08cb46 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 26 Sep 2025 04:44:38 +0530 Subject: [PATCH 6/8] Add string as a primitve to struct defs --- examples/execve5.py | 1 + pythonbpf/structs_pass.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/examples/execve5.py b/examples/execve5.py index 691f008..3f890d0 100644 --- a/examples/execve5.py +++ b/examples/execve5.py @@ -10,6 +10,7 @@ class data_t: pid: c_uint64 ts: c_uint64 + comm: str(16) @bpf diff --git a/pythonbpf/structs_pass.py b/pythonbpf/structs_pass.py index fbe06ed..65b8fcc 100644 --- a/pythonbpf/structs_pass.py +++ b/pythonbpf/structs_pass.py @@ -28,14 +28,25 @@ def process_bpf_struct(cls_node, module): for item in cls_node.body: if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name): + print(f"Field: {item.target.id}, Type: " + f"{ast.dump(item.annotation)}") field_names.append(item.target.id) - field_types.append(ctypes_to_ir(item.annotation.id)) + if isinstance(item.annotation, ast.Call) and isinstance(item.annotation.func, ast.Name) and item.annotation.func.id == "str": + # This is a char array with fixed length + # TODO: For now assuming str is always called with constant + field_types.append(ir.ArrayType( + ir.IntType(8), item.annotation.args[0].value)) + else: + field_types.append(ctypes_to_ir(item.annotation.id)) curr_offset = 0 for ftype in field_types: if isinstance(ftype, ir.IntType): fsize = ftype.width // 8 alignment = fsize + elif isinstance(ftype, ir.ArrayType): + fsize = ftype.count * (ftype.element.width // 8) + alignment = ftype.element.width // 8 elif isinstance(ftype, ir.PointerType): fsize = 8 alignment = 8 From 4ebf0480dd45fa659cad9223b7f38e8ad9472c12 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 26 Sep 2025 04:54:01 +0530 Subject: [PATCH 7/8] tweak commit to add placeholder string --- examples/execve5.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/execve5.py b/examples/execve5.py index 3f890d0..4f1488f 100644 --- a/examples/execve5.py +++ b/examples/execve5.py @@ -22,13 +22,13 @@ def events() -> PerfEventArray: @bpf @section("tracepoint/syscalls/sys_enter_clone") def hello(ctx: c_void_p) -> c_int32: - strobj = "Hi" dataobj = data_t() ts = ktime() process_id = pid() + strobj = "hellohellohello" dataobj.pid = process_id dataobj.ts = ts - print(f"clone called at {ts} by pid {process_id}, str is {strobj}") + print(f"clone called at {ts} by pid {process_id}, comm {strobj}") events.output(dataobj) return c_int32(0) From 0f365be65e372c89bd41d7e66d2a5177126a4ef0 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 26 Sep 2025 18:26:07 +0530 Subject: [PATCH 8/8] Add some support for strings in structs --- examples/execve5.py | 1 + pythonbpf/functions_pass.py | 51 +++++++++++++++++++++++++++++++++++++ pythonbpf/structs_pass.py | 3 ++- 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/examples/execve5.py b/examples/execve5.py index 4f1488f..32063ff 100644 --- a/examples/execve5.py +++ b/examples/execve5.py @@ -28,6 +28,7 @@ def hello(ctx: c_void_p) -> c_int32: strobj = "hellohellohello" dataobj.pid = process_id dataobj.ts = ts + # dataobj.comm = strobj print(f"clone called at {ts} by pid {process_id}, comm {strobj}") events.output(dataobj) return c_int32(0) diff --git a/pythonbpf/functions_pass.py b/pythonbpf/functions_pass.py index 28c1caf..674617a 100644 --- a/pythonbpf/functions_pass.py +++ b/pythonbpf/functions_pass.py @@ -58,9 +58,17 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc inbounds=True) val = eval_expr(func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab) + if isinstance(struct_info["field_types"][field_idx], ir.ArrayType) and val[1] == ir.PointerType(ir.IntType(8)): + # TODO: Figure it out, not a priority rn + # Special case for string assignment to char array + #str_len = struct_info["field_types"][field_idx].count + #assign_string_to_array(builder, field_ptr, val[0], str_len) + #print(f"Assigned to struct field {var_name}.{field_name}") + pass if val is None: print("Failed to evaluate struct field assignment") return + print(field_ptr) builder.store(val[0], field_ptr) print(f"Assigned to struct field {var_name}.{field_name}") return @@ -537,3 +545,46 @@ def _expr_type(e): raise ValueError("Conflicting return types:" f"{found_type} vs {t}") return found_type or "None" + +# For string assignment to fixed-size arrays +def assign_string_to_array(builder, target_array_ptr, source_string_ptr, array_length): + """ + Copy a string (i8*) to a fixed-size array ([N x i8]*) + """ + # Create a loop to copy characters one by one + entry_block = builder.block + copy_block = builder.append_basic_block("copy_char") + end_block = builder.append_basic_block("copy_end") + + # Create loop counter + i = builder.alloca(ir.IntType(32)) + builder.store(ir.Constant(ir.IntType(32), 0), i) + + # Start the loop + builder.branch(copy_block) + + # Copy loop + builder.position_at_end(copy_block) + idx = builder.load(i) + in_bounds = builder.icmp_unsigned('<', idx, ir.Constant(ir.IntType(32), array_length)) + builder.cbranch(in_bounds, copy_block, end_block) + + with builder.if_then(in_bounds): + # Load character from source + src_ptr = builder.gep(source_string_ptr, [idx]) + char = builder.load(src_ptr) + + # Store character in target + dst_ptr = builder.gep(target_array_ptr, [ir.Constant(ir.IntType(32), 0), idx]) + builder.store(char, dst_ptr) + + # Increment counter + next_idx = builder.add(idx, ir.Constant(ir.IntType(32), 1)) + builder.store(next_idx, i) + + builder.position_at_end(end_block) + + # Ensure null termination + last_idx = ir.Constant(ir.IntType(32), array_length - 1) + null_ptr = builder.gep(target_array_ptr, [ir.Constant(ir.IntType(32), 0), last_idx]) + builder.store(ir.Constant(ir.IntType(8), 0), null_ptr) diff --git a/pythonbpf/structs_pass.py b/pythonbpf/structs_pass.py index 65b8fcc..19ad3c9 100644 --- a/pythonbpf/structs_pass.py +++ b/pythonbpf/structs_pass.py @@ -63,6 +63,7 @@ def process_bpf_struct(cls_node, module): structs_sym_tab[struct_name] = { "type": struct_type, "fields": {name: idx for idx, name in enumerate(field_names)}, - "size": total_size + "size": total_size, + "field_types": field_types, } print(f"Created struct {struct_name} with fields {field_names}")