From 6008d9841f038924c2cdd72d717c565f6e1f82c5 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Wed, 8 Oct 2025 22:45:09 +0530 Subject: [PATCH 01/43] Change loglevel of multi-assignment warning in handle_assign --- pythonbpf/functions/functions_pass.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 7fc3febc..0da41fda 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -48,8 +48,11 @@ def handle_assign( func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab ): """Handle assignment statements in the function body.""" + + # TODO: Support this later + # GH #37 if len(stmt.targets) != 1: - logger.info("Unsupported multiassignment") + logger.error("Multi-target assignment is not supported for now") return num_types = ("c_int32", "c_int64", "c_uint32", "c_uint64") From 84ed27f222aad9268c65c937c6c5e3b421ea7e67 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Wed, 8 Oct 2025 22:55:03 +0530 Subject: [PATCH 02/43] Add handle_variable_assignment stub and boilerplate in handle_assign --- pythonbpf/assign_pass.py | 4 ++++ pythonbpf/functions/functions_pass.py | 22 +++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 pythonbpf/assign_pass.py diff --git a/pythonbpf/assign_pass.py b/pythonbpf/assign_pass.py new file mode 100644 index 00000000..ee8edb51 --- /dev/null +++ b/pythonbpf/assign_pass.py @@ -0,0 +1,4 @@ +def handle_variable_assignment( + func, module, builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab +): + pass diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 0da41fda..d29de2f4 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -8,6 +8,7 @@ from pythonbpf.type_deducer import ctypes_to_ir from pythonbpf.binary_ops import handle_binary_op from pythonbpf.expr import eval_expr, handle_expr, convert_to_bool +from pythonbpf.assign_pass import handle_variable_assignment from .return_utils import _handle_none_return, _handle_xdp_return, _is_xdp_name @@ -55,9 +56,28 @@ def handle_assign( logger.error("Multi-target assignment is not supported for now") return + target = stmt.targets[0] + rval = stmt.value + + if isinstance(target, ast.Name): + # NOTE: Simple variable assignment case: x = 5 + var_name = target.id + result = handle_variable_assignment( + func, + module, + builder, + var_name, + rval, + local_sym_tab, + map_sym_tab, + structs_sym_tab, + ) + if not result: + logger.error(f"Failed to handle assignment to {var_name}") + return + num_types = ("c_int32", "c_int64", "c_uint32", "c_uint64") - target = stmt.targets[0] logger.info(f"Handling assignment to {ast.dump(target)}") if not isinstance(target, ast.Name) and not isinstance(target, ast.Attribute): logger.info("Unsupported assignment target") From d7bfe86524b3fc0c1325e70ff0a5f10ee2398b2d Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Thu, 9 Oct 2025 03:09:10 +0530 Subject: [PATCH 03/43] Add handle_variable_assignment to assign_pass --- pythonbpf/assign_pass.py | 55 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/pythonbpf/assign_pass.py b/pythonbpf/assign_pass.py index ee8edb51..32753ff0 100644 --- a/pythonbpf/assign_pass.py +++ b/pythonbpf/assign_pass.py @@ -1,4 +1,57 @@ +import ast +import logging +from llvmlite import ir +from pythonbpf.expr import eval_expr + +logger = logging.getLogger(__name__) + + def handle_variable_assignment( func, module, builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab ): - pass + """Handle single named variable assignment.""" + + if var_name not in local_sym_tab: + logger.error(f"Variable {var_name} not declared.") + return False + + var_ptr = local_sym_tab[var_name].var + var_type = local_sym_tab[var_name].ir_type + + # NOTE: Special case for struct initialization + if isinstance(rval, ast.Call) and isinstance(rval.func, ast.Name): + struct_name = rval.func.id + if struct_name in structs_sym_tab and len(rval.args) == 0: + struct_info = structs_sym_tab[struct_name] + ir_struct = struct_info.ir_type + + builder.store(ir.Constant(ir_struct, None), var_ptr) + logger.info(f"Initialized struct {struct_name} for variable {var_name}") + return True + + val_result = eval_expr( + func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab + ) + if val_result is None: + logger.error(f"Failed to evaluate value for {var_name}") + return False + + val, val_type = val_result + if val_type != var_type: + if isinstance(val_type, ir.IntType) and isinstance(var_type, ir.IntType): + # Allow implicit int widening + if val_type.width < var_type.width: + val = builder.sext(val, var_type) + logger.info(f"Implicitly widened int for variable {var_name}") + elif val_type.width > var_type.width: + val = builder.trunc(val, var_type) + logger.info(f"Implicitly truncated int for variable {var_name}") + else: + logger.error( + f"Type mismatch for variable {var_name}: {val_type} vs {var_type}" + ) + return False + + builder.store(val, var_ptr) + logger.info(f"Assigned value to variable {var_name}") + return True From 054a834464e3e7c6e940d244ecc552f7290cd63a Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Thu, 9 Oct 2025 03:28:07 +0530 Subject: [PATCH 04/43] Add failing assign test retype.py, with explanation --- tests/failing_tests/assign/retype.py | 39 ++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 tests/failing_tests/assign/retype.py diff --git a/tests/failing_tests/assign/retype.py b/tests/failing_tests/assign/retype.py new file mode 100644 index 00000000..b4fc04ed --- /dev/null +++ b/tests/failing_tests/assign/retype.py @@ -0,0 +1,39 @@ +from pythonbpf import bpf, map, section, bpfglobal, compile +from ctypes import c_void_p, c_int64, c_uint64 +from pythonbpf.maps import HashMap + + +# NOTE: This example tries to reinterpret the variable `x` to a different type. +# We do not allow this for now, as stack allocations are typed and have to be +# done in the first basic block. Allowing re-interpretation would require +# re-allocation of stack space (possibly in a new basic block), which is not +# supported in eBPF yet. +# We can allow bitcasts in cases where the width of the types is the same in +# the future. But for now, we do not allow any re-interpretation of variables. + +@bpf +@map +def last() -> HashMap: + return HashMap(key=c_uint64, value=c_uint64, max_entries=3) + + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def hello_world(ctx: c_void_p) -> c_int64: + last.update(0, 1) + x = last.lookup(0) + x = 20 + if x == 2: + print("Hello, World!") + else: + print("Goodbye, World!") + return + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +compile() From c596213b2a3c4a520922da12421d9e284e574c65 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Thu, 9 Oct 2025 03:42:25 +0530 Subject: [PATCH 05/43] Add cst_var_binop.py as passing assign test --- tests/passing_tests/assign/cst_var_binop.py | 27 +++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tests/passing_tests/assign/cst_var_binop.py diff --git a/tests/passing_tests/assign/cst_var_binop.py b/tests/passing_tests/assign/cst_var_binop.py new file mode 100644 index 00000000..957e6783 --- /dev/null +++ b/tests/passing_tests/assign/cst_var_binop.py @@ -0,0 +1,27 @@ +from pythonbpf import bpf, section, bpfglobal, compile +from ctypes import c_void_p, c_int64 + + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def hello_world(ctx: c_void_p) -> c_int64: + x = 1 + print(f"Initial x: {x}") + a = 20 + x = a + print(f"Updated x with a: {x}") + x = (x + x) * 3 + if x == 2: + print("Hello, World!") + else: + print(f"Goodbye, World! {x}") + return + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +compile() From 23afb0bd3343ef948dbf1c156a7fbdc13b91fdde Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Thu, 9 Oct 2025 21:47:28 +0530 Subject: [PATCH 06/43] Add deref_to_val to deref into final value and return the chain as well in binops --- pythonbpf/binary_ops.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pythonbpf/binary_ops.py b/pythonbpf/binary_ops.py index c0ca0ec0..cca0b482 100644 --- a/pythonbpf/binary_ops.py +++ b/pythonbpf/binary_ops.py @@ -19,6 +19,24 @@ def recursive_dereferencer(var, builder): raise TypeError(f"Unsupported type for dereferencing: {var.type}") +def deref_to_val(var, builder): + """Dereference a variable to get its value and pointer chain.""" + logger.info(f"Dereferencing {var}, type is {var.type}") + + chain = [var] + cur = var + + while isinstance(cur.type, ir.PointerType): + cur = builder.load(cur) + chain.append(cur) + + if isinstance(cur.type, ir.IntType): + logger.info(f"dereference chain: {chain}") + return cur, chain + else: + raise TypeError(f"Unsupported type for dereferencing: {cur.type}") + + def get_operand_value(operand, builder, local_sym_tab): """Extract the value from an operand, handling variables and constants.""" if isinstance(operand, ast.Name): From 1253f51ff358a172e7597eabd7a72eab7ff8730e Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Thu, 9 Oct 2025 23:11:06 +0530 Subject: [PATCH 07/43] Use deref_to_val instead of recursive_dereferencer in get_operand value --- pythonbpf/binary_ops.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pythonbpf/binary_ops.py b/pythonbpf/binary_ops.py index cca0b482..690a4e33 100644 --- a/pythonbpf/binary_ops.py +++ b/pythonbpf/binary_ops.py @@ -41,14 +41,18 @@ def get_operand_value(operand, builder, local_sym_tab): """Extract the value from an operand, handling variables and constants.""" if isinstance(operand, ast.Name): if operand.id in local_sym_tab: - return recursive_dereferencer(local_sym_tab[operand.id].var, builder) + var = local_sym_tab[operand.id].var + val, chain = deref_to_val(var, builder) + return val, chain, var raise ValueError(f"Undefined variable: {operand.id}") elif isinstance(operand, ast.Constant): if isinstance(operand.value, int): - return ir.Constant(ir.IntType(64), operand.value) + cst = ir.Constant(ir.IntType(64), operand.value) + return cst, [cst], None raise TypeError(f"Unsupported constant type: {type(operand.value)}") elif isinstance(operand, ast.BinOp): - return handle_binary_op_impl(operand, builder, local_sym_tab) + res = handle_binary_op_impl(operand, builder, local_sym_tab) + return res, [res], None raise TypeError(f"Unsupported operand type: {type(operand)}") From 8bab07ed72f0c89ad07050cdb9deaba2740db2c0 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 10 Oct 2025 00:13:35 +0530 Subject: [PATCH 08/43] Remove recursive_dereferencer --- pythonbpf/binary_ops.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/pythonbpf/binary_ops.py b/pythonbpf/binary_ops.py index 690a4e33..ccd51f86 100644 --- a/pythonbpf/binary_ops.py +++ b/pythonbpf/binary_ops.py @@ -6,19 +6,6 @@ logger: Logger = logging.getLogger(__name__) -def recursive_dereferencer(var, builder): - """dereference until primitive type comes out""" - # TODO: Not worrying about stack overflow for now - logger.info(f"Dereferencing {var}, type is {var.type}") - if isinstance(var.type, ir.PointerType): - a = builder.load(var) - return recursive_dereferencer(a, builder) - elif isinstance(var.type, ir.IntType): - return var - else: - raise TypeError(f"Unsupported type for dereferencing: {var.type}") - - def deref_to_val(var, builder): """Dereference a variable to get its value and pointer chain.""" logger.info(f"Dereferencing {var}, type is {var.type}") @@ -58,8 +45,8 @@ def get_operand_value(operand, builder, local_sym_tab): def handle_binary_op_impl(rval, builder, local_sym_tab): op = rval.op - left = get_operand_value(rval.left, builder, local_sym_tab) - right = get_operand_value(rval.right, builder, local_sym_tab) + left, _, _ = get_operand_value(rval.left, builder, local_sym_tab) + right, _, _ = get_operand_value(rval.right, builder, local_sym_tab) logger.info(f"left is {left}, right is {right}, op is {op}") # Map AST operation nodes to LLVM IR builder methods From 489244a015cf7ba2230c0f06bb3f3e768a3eefd9 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 10 Oct 2025 02:56:11 +0530 Subject: [PATCH 09/43] Add store_through_chain --- pythonbpf/binary_ops.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pythonbpf/binary_ops.py b/pythonbpf/binary_ops.py index ccd51f86..7fa89697 100644 --- a/pythonbpf/binary_ops.py +++ b/pythonbpf/binary_ops.py @@ -43,6 +43,16 @@ def get_operand_value(operand, builder, local_sym_tab): raise TypeError(f"Unsupported operand type: {type(operand)}") +def store_through_chain(value, chain, builder): + """Store a value through a pointer chain.""" + if not chain or len(chain) < 2: + raise ValueError("Pointer chain must have at least two elements") + + for ptr in reversed(chain[1:]): + builder.store(value, ptr) + value = ptr + + def handle_binary_op_impl(rval, builder, local_sym_tab): op = rval.op left, _, _ = get_operand_value(rval.left, builder, local_sym_tab) From 047f361ea91024877ebc101d6a48eabcf0e5b855 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 10 Oct 2025 06:09:46 +0530 Subject: [PATCH 10/43] Allocate twice for map lookups --- pythonbpf/binary_ops.py | 6 ++++-- pythonbpf/functions/functions_pass.py | 12 +++++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/pythonbpf/binary_ops.py b/pythonbpf/binary_ops.py index 7fa89697..77c184fb 100644 --- a/pythonbpf/binary_ops.py +++ b/pythonbpf/binary_ops.py @@ -55,10 +55,12 @@ def store_through_chain(value, chain, builder): def handle_binary_op_impl(rval, builder, local_sym_tab): op = rval.op - left, _, _ = get_operand_value(rval.left, builder, local_sym_tab) - right, _, _ = get_operand_value(rval.right, builder, local_sym_tab) + left, lchain, _ = get_operand_value(rval.left, builder, local_sym_tab) + right, rchain, _ = get_operand_value(rval.right, builder, local_sym_tab) logger.info(f"left is {left}, right is {right}, op is {op}") + logger.info(f"left chain: {lchain}, right chain: {rchain}") + # Map AST operation nodes to LLVM IR builder methods op_map = { ast.Add: builder.add, diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index d29de2f4..6c2de1a8 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -455,10 +455,17 @@ def allocate_mem( f"Pre-allocated variable {var_name} for struct {call_type}" ) elif isinstance(rval.func, ast.Attribute): + # Map method call ir_type = ir.PointerType(ir.IntType(64)) var = builder.alloca(ir_type, name=var_name) + + # declare an intermediate ptr type for map lookup + ir_type = ir.IntType(64) + var_tmp = builder.alloca(ir_type, name=f"{var_name}_tmp") # var.align = ir_type.width // 8 - logger.info(f"Pre-allocated variable {var_name} for map") + logger.info( + f"Pre-allocated variable {var_name} and {var_name}_tmp for map" + ) else: logger.info("Unsupported assignment call function type") continue @@ -496,6 +503,9 @@ def allocate_mem( local_sym_tab[var_name] = LocalSymbol(var, ir_type, call_type) else: local_sym_tab[var_name] = LocalSymbol(var, ir_type) + + if var_tmp: + local_sym_tab[f"{var_name}_tmp"] = LocalSymbol(var_tmp, ir_type) return local_sym_tab From 1d517d4e09abedeb5c90b56d32ebb444550275f9 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 10 Oct 2025 12:28:45 +0530 Subject: [PATCH 11/43] Add double_alloc in alloc_mem --- pythonbpf/functions/functions_pass.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 6c2de1a8..a1414ea5 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -386,6 +386,7 @@ def process_stmt( def allocate_mem( module, builder, body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab ): + double_alloc = False for stmt in body: has_metadata = False if isinstance(stmt, ast.If): @@ -462,6 +463,7 @@ def allocate_mem( # declare an intermediate ptr type for map lookup ir_type = ir.IntType(64) var_tmp = builder.alloca(ir_type, name=f"{var_name}_tmp") + double_alloc = True # var.align = ir_type.width // 8 logger.info( f"Pre-allocated variable {var_name} and {var_name}_tmp for map" @@ -504,7 +506,7 @@ def allocate_mem( else: local_sym_tab[var_name] = LocalSymbol(var, ir_type) - if var_tmp: + if double_alloc: local_sym_tab[f"{var_name}_tmp"] = LocalSymbol(var_tmp, ir_type) return local_sym_tab From 99aacca94b88a2520bd8c774320f199c2ace8dc8 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 10 Oct 2025 13:48:40 +0530 Subject: [PATCH 12/43] WIP: allow pointer assignments to var --- pythonbpf/assign_pass.py | 19 ++++++++++++++++++- pythonbpf/expr/__init__.py | 4 ++-- pythonbpf/expr/type_normalization.py | 6 +++--- pythonbpf/functions/functions_pass.py | 6 +++--- 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/pythonbpf/assign_pass.py b/pythonbpf/assign_pass.py index 32753ff0..93c8003a 100644 --- a/pythonbpf/assign_pass.py +++ b/pythonbpf/assign_pass.py @@ -1,7 +1,7 @@ import ast import logging from llvmlite import ir -from pythonbpf.expr import eval_expr +from pythonbpf.expr import eval_expr, get_base_type_and_depth logger = logging.getLogger(__name__) @@ -38,6 +38,9 @@ def handle_variable_assignment( val, val_type = val_result if val_type != var_type: + logger.info(f"val = {val}") + logger.info(f"var = {var_ptr}") + logger.info(f"truthy {var_type}") if isinstance(val_type, ir.IntType) and isinstance(var_type, ir.IntType): # Allow implicit int widening if val_type.width < var_type.width: @@ -46,10 +49,24 @@ def handle_variable_assignment( elif val_type.width > var_type.width: val = builder.trunc(val, var_type) logger.info(f"Implicitly truncated int for variable {var_name}") + elif isinstance(val_type, ir.IntType) and isinstance(var_type, ir.PointerType): + ptr_target, ptr_depth = get_base_type_and_depth(var_type) + if ptr_target.width > val_type.width: + val = builder.sext(val, ptr_target) + elif ptr_target.width < val_type.width: + val = builder.trunc(val, ptr_target) + + if ptr_depth > 1: + # NOTE: This is assignment to a PTR_TO_MAP_VALUE_OR_NULL + var_ptr_tmp = local_sym_tab[f"{var_name}_tmp"].var + builder.store(val, var_ptr_tmp) + val = var_ptr_tmp else: logger.error( f"Type mismatch for variable {var_name}: {val_type} vs {var_type}" ) + logger.error(f"var_type: {isinstance(var_type, ir.PointerType)}") + logger.error(f"val_type: {isinstance(val_type, ir.IntType)}") return False builder.store(val, var_ptr) diff --git a/pythonbpf/expr/__init__.py b/pythonbpf/expr/__init__.py index d58c543a..577ee3cc 100644 --- a/pythonbpf/expr/__init__.py +++ b/pythonbpf/expr/__init__.py @@ -1,4 +1,4 @@ from .expr_pass import eval_expr, handle_expr -from .type_normalization import convert_to_bool +from .type_normalization import convert_to_bool, get_base_type_and_depth -__all__ = ["eval_expr", "handle_expr", "convert_to_bool"] +__all__ = ["eval_expr", "handle_expr", "convert_to_bool", "get_base_type_and_depth"] diff --git a/pythonbpf/expr/type_normalization.py b/pythonbpf/expr/type_normalization.py index 7a2fb574..34b4fb7c 100644 --- a/pythonbpf/expr/type_normalization.py +++ b/pythonbpf/expr/type_normalization.py @@ -16,7 +16,7 @@ } -def _get_base_type_and_depth(ir_type): +def get_base_type_and_depth(ir_type): """Get the base type for pointer types.""" cur_type = ir_type depth = 0 @@ -88,8 +88,8 @@ def _normalize_types(func, builder, lhs, rhs): logger.error(f"Type mismatch: {lhs.type} vs {rhs.type}") return None, None else: - lhs_base, lhs_depth = _get_base_type_and_depth(lhs.type) - rhs_base, rhs_depth = _get_base_type_and_depth(rhs.type) + lhs_base, lhs_depth = get_base_type_and_depth(lhs.type) + rhs_base, rhs_depth = get_base_type_and_depth(rhs.type) if lhs_base == rhs_base: if lhs_depth < rhs_depth: rhs = _deref_to_depth(func, builder, rhs, rhs_depth - lhs_depth) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index a1414ea5..41d40481 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -461,8 +461,8 @@ def allocate_mem( var = builder.alloca(ir_type, name=var_name) # declare an intermediate ptr type for map lookup - ir_type = ir.IntType(64) - var_tmp = builder.alloca(ir_type, name=f"{var_name}_tmp") + tmp_ir_type = ir.IntType(64) + var_tmp = builder.alloca(tmp_ir_type, name=f"{var_name}_tmp") double_alloc = True # var.align = ir_type.width // 8 logger.info( @@ -507,7 +507,7 @@ def allocate_mem( local_sym_tab[var_name] = LocalSymbol(var, ir_type) if double_alloc: - local_sym_tab[f"{var_name}_tmp"] = LocalSymbol(var_tmp, ir_type) + local_sym_tab[f"{var_name}_tmp"] = LocalSymbol(var_tmp, tmp_ir_type) return local_sym_tab From 9febadffd34f9bd183fae41421c0c6a4db38fa2a Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 10 Oct 2025 15:01:15 +0530 Subject: [PATCH 13/43] Add pointer handling to helper_utils, finish pointer assignment --- pythonbpf/expr/__init__.py | 10 +++++-- pythonbpf/expr/expr_pass.py | 2 +- pythonbpf/expr/type_normalization.py | 6 ++-- pythonbpf/helper/helper_utils.py | 42 ++++++++++++++++++++++++---- 4 files changed, 48 insertions(+), 12 deletions(-) diff --git a/pythonbpf/expr/__init__.py b/pythonbpf/expr/__init__.py index 577ee3cc..dd5b4802 100644 --- a/pythonbpf/expr/__init__.py +++ b/pythonbpf/expr/__init__.py @@ -1,4 +1,10 @@ from .expr_pass import eval_expr, handle_expr -from .type_normalization import convert_to_bool, get_base_type_and_depth +from .type_normalization import convert_to_bool, get_base_type_and_depth, deref_to_depth -__all__ = ["eval_expr", "handle_expr", "convert_to_bool", "get_base_type_and_depth"] +__all__ = [ + "eval_expr", + "handle_expr", + "convert_to_bool", + "get_base_type_and_depth", + "deref_to_depth", +] diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index 21be1961..d4fc9407 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -26,7 +26,7 @@ def _handle_constant_expr(expr: ast.Constant): if isinstance(expr.value, int) or isinstance(expr.value, bool): return ir.Constant(ir.IntType(64), int(expr.value)), ir.IntType(64) else: - logger.error("Unsupported constant type") + logger.error(f"Unsupported constant type {ast.dump(expr)}") return None diff --git a/pythonbpf/expr/type_normalization.py b/pythonbpf/expr/type_normalization.py index 34b4fb7c..fec53a41 100644 --- a/pythonbpf/expr/type_normalization.py +++ b/pythonbpf/expr/type_normalization.py @@ -26,7 +26,7 @@ def get_base_type_and_depth(ir_type): return cur_type, depth -def _deref_to_depth(func, builder, val, target_depth): +def deref_to_depth(func, builder, val, target_depth): """Dereference a pointer to a certain depth.""" cur_val = val @@ -92,9 +92,9 @@ def _normalize_types(func, builder, lhs, rhs): rhs_base, rhs_depth = get_base_type_and_depth(rhs.type) if lhs_base == rhs_base: if lhs_depth < rhs_depth: - rhs = _deref_to_depth(func, builder, rhs, rhs_depth - lhs_depth) + rhs = deref_to_depth(func, builder, rhs, rhs_depth - lhs_depth) elif rhs_depth < lhs_depth: - lhs = _deref_to_depth(func, builder, lhs, lhs_depth - rhs_depth) + lhs = deref_to_depth(func, builder, lhs, lhs_depth - rhs_depth) return _normalize_types(func, builder, lhs, rhs) diff --git a/pythonbpf/helper/helper_utils.py b/pythonbpf/helper/helper_utils.py index 68ab52cd..7b46a40b 100644 --- a/pythonbpf/helper/helper_utils.py +++ b/pythonbpf/helper/helper_utils.py @@ -3,7 +3,7 @@ from collections.abc import Callable from llvmlite import ir -from pythonbpf.expr import eval_expr +from pythonbpf.expr import eval_expr, get_base_type_and_depth, deref_to_depth logger = logging.getLogger(__name__) @@ -224,10 +224,27 @@ def _populate_fval(ftype, node, fmt_parts, exprs): raise NotImplementedError( f"Unsupported integer width in f-string: {ftype.width}" ) - elif ftype == ir.PointerType(ir.IntType(8)): - # NOTE: We assume i8* is a string - fmt_parts.append("%s") - exprs.append(node) + elif isinstance(ftype, ir.PointerType): + target, depth = get_base_type_and_depth(ftype) + if isinstance(target, ir.IntType): + if target.width == 64: + fmt_parts.append("%lld") + exprs.append(node) + elif target.width == 32: + fmt_parts.append("%d") + exprs.append(node) + elif target.width == 8 and depth == 1: + # NOTE: Assume i8* is a string + fmt_parts.append("%s") + exprs.append(node) + else: + raise NotImplementedError( + f"Unsupported pointer target type in f-string: {target}" + ) + else: + raise NotImplementedError( + f"Unsupported pointer target type in f-string: {target}" + ) else: raise NotImplementedError(f"Unsupported field type in f-string: {ftype}") @@ -264,7 +281,20 @@ def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_ta if val: if isinstance(val.type, ir.PointerType): - val = builder.ptrtoint(val, ir.IntType(64)) + target, depth = get_base_type_and_depth(val.type) + if isinstance(target, ir.IntType): + if target.width >= 32: + val = deref_to_depth(func, builder, val, depth) + val = builder.sext(val, ir.IntType(64)) + elif target.width == 8 and depth == 1: + # NOTE: i8* is string, no need to deref + pass + + else: + logger.warning( + "Only int and ptr supported in bpf_printk args. Others default to 0." + ) + val = ir.Constant(ir.IntType(64), 0) elif isinstance(val.type, ir.IntType): if val.type.width < 64: val = builder.sext(val, ir.IntType(64)) From 7529820c0b5cfa120c05cb3a970f08a7d9a01179 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 10 Oct 2025 20:36:37 +0530 Subject: [PATCH 14/43] Allow int** pointers to store binops of type int** op int --- pythonbpf/assign_pass.py | 26 ++++++++--------------- pythonbpf/binary_ops.py | 41 +++++++++++++------------------------ pythonbpf/expr/expr_pass.py | 2 +- 3 files changed, 24 insertions(+), 45 deletions(-) diff --git a/pythonbpf/assign_pass.py b/pythonbpf/assign_pass.py index 93c8003a..969870d0 100644 --- a/pythonbpf/assign_pass.py +++ b/pythonbpf/assign_pass.py @@ -1,7 +1,7 @@ import ast import logging from llvmlite import ir -from pythonbpf.expr import eval_expr, get_base_type_and_depth +from pythonbpf.expr import eval_expr logger = logging.getLogger(__name__) @@ -37,10 +37,8 @@ def handle_variable_assignment( return False val, val_type = val_result + logger.info(f"Evaluated value for {var_name}: {val} of type {val_type}, {var_type}") if val_type != var_type: - logger.info(f"val = {val}") - logger.info(f"var = {var_ptr}") - logger.info(f"truthy {var_type}") if isinstance(val_type, ir.IntType) and isinstance(var_type, ir.IntType): # Allow implicit int widening if val_type.width < var_type.width: @@ -50,23 +48,17 @@ def handle_variable_assignment( val = builder.trunc(val, var_type) logger.info(f"Implicitly truncated int for variable {var_name}") elif isinstance(val_type, ir.IntType) and isinstance(var_type, ir.PointerType): - ptr_target, ptr_depth = get_base_type_and_depth(var_type) - if ptr_target.width > val_type.width: - val = builder.sext(val, ptr_target) - elif ptr_target.width < val_type.width: - val = builder.trunc(val, ptr_target) - - if ptr_depth > 1: - # NOTE: This is assignment to a PTR_TO_MAP_VALUE_OR_NULL - var_ptr_tmp = local_sym_tab[f"{var_name}_tmp"].var - builder.store(val, var_ptr_tmp) - val = var_ptr_tmp + # NOTE: This is assignment to a PTR_TO_MAP_VALUE_OR_NULL + logger.info( + f"Creating temporary variable for pointer assignment to {var_name}" + ) + var_ptr_tmp = local_sym_tab[f"{var_name}_tmp"].var + builder.store(val, var_ptr_tmp) + val = var_ptr_tmp else: logger.error( f"Type mismatch for variable {var_name}: {val_type} vs {var_type}" ) - logger.error(f"var_type: {isinstance(var_type, ir.PointerType)}") - logger.error(f"val_type: {isinstance(val_type, ir.IntType)}") return False builder.store(val, var_ptr) diff --git a/pythonbpf/binary_ops.py b/pythonbpf/binary_ops.py index 77c184fb..40417f87 100644 --- a/pythonbpf/binary_ops.py +++ b/pythonbpf/binary_ops.py @@ -3,34 +3,21 @@ from logging import Logger import logging -logger: Logger = logging.getLogger(__name__) - - -def deref_to_val(var, builder): - """Dereference a variable to get its value and pointer chain.""" - logger.info(f"Dereferencing {var}, type is {var.type}") - - chain = [var] - cur = var +from pythonbpf.expr import get_base_type_and_depth, deref_to_depth - while isinstance(cur.type, ir.PointerType): - cur = builder.load(cur) - chain.append(cur) - - if isinstance(cur.type, ir.IntType): - logger.info(f"dereference chain: {chain}") - return cur, chain - else: - raise TypeError(f"Unsupported type for dereferencing: {cur.type}") +logger: Logger = logging.getLogger(__name__) -def get_operand_value(operand, builder, local_sym_tab): +def get_operand_value(func, operand, builder, local_sym_tab): """Extract the value from an operand, handling variables and constants.""" if isinstance(operand, ast.Name): if operand.id in local_sym_tab: var = local_sym_tab[operand.id].var - val, chain = deref_to_val(var, builder) - return val, chain, var + var_type = var.type + base_type, depth = get_base_type_and_depth(var_type) + logger.info(f"var is {var}, base_type is {base_type}, depth is {depth}") + val = deref_to_depth(func, builder, var, depth) + return val, [val], var raise ValueError(f"Undefined variable: {operand.id}") elif isinstance(operand, ast.Constant): if isinstance(operand.value, int): @@ -38,7 +25,7 @@ def get_operand_value(operand, builder, local_sym_tab): return cst, [cst], None raise TypeError(f"Unsupported constant type: {type(operand.value)}") elif isinstance(operand, ast.BinOp): - res = handle_binary_op_impl(operand, builder, local_sym_tab) + res = handle_binary_op_impl(func, operand, builder, local_sym_tab) return res, [res], None raise TypeError(f"Unsupported operand type: {type(operand)}") @@ -53,10 +40,10 @@ def store_through_chain(value, chain, builder): value = ptr -def handle_binary_op_impl(rval, builder, local_sym_tab): +def handle_binary_op_impl(func, rval, builder, local_sym_tab): op = rval.op - left, lchain, _ = get_operand_value(rval.left, builder, local_sym_tab) - right, rchain, _ = get_operand_value(rval.right, builder, local_sym_tab) + left, lchain, _ = get_operand_value(func, rval.left, builder, local_sym_tab) + right, rchain, _ = get_operand_value(func, rval.right, builder, local_sym_tab) logger.info(f"left is {left}, right is {right}, op is {op}") logger.info(f"left chain: {lchain}, right chain: {rchain}") @@ -83,8 +70,8 @@ def handle_binary_op_impl(rval, builder, local_sym_tab): raise SyntaxError("Unsupported binary operation") -def handle_binary_op(rval, builder, var_name, local_sym_tab): - result = handle_binary_op_impl(rval, builder, local_sym_tab) +def handle_binary_op(func, rval, builder, var_name, local_sym_tab): + result = handle_binary_op_impl(func, rval, builder, local_sym_tab) if var_name and var_name in local_sym_tab: logger.info( f"Storing result {result} into variable {local_sym_tab[var_name].var}" diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index d4fc9407..8a5b6089 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -402,7 +402,7 @@ def eval_expr( elif isinstance(expr, ast.BinOp): from pythonbpf.binary_ops import handle_binary_op - return handle_binary_op(expr, builder, None, local_sym_tab) + return handle_binary_op(func, expr, builder, None, local_sym_tab) elif isinstance(expr, ast.Compare): return _handle_compare( func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab From a756f5e4b71e07aadb0f16168c811d8c12edda20 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 10 Oct 2025 23:55:12 +0530 Subject: [PATCH 15/43] Add passing helper test for assignment --- tests/passing_tests/assign/helper.py | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 tests/passing_tests/assign/helper.py diff --git a/tests/passing_tests/assign/helper.py b/tests/passing_tests/assign/helper.py new file mode 100644 index 00000000..9809a9c7 --- /dev/null +++ b/tests/passing_tests/assign/helper.py @@ -0,0 +1,34 @@ +from pythonbpf import bpf, map, section, bpfglobal, compile +from ctypes import c_void_p, c_int64, c_uint64 +from pythonbpf.maps import HashMap + +# NOTE: An example of i64** assignment with binops on the RHS + + +@bpf +@map +def last() -> HashMap: + return HashMap(key=c_uint64, value=c_uint64, max_entries=3) + + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def hello_world(ctx: c_void_p) -> c_int64: + last.update(0, 1) + x = last.lookup(0) + print(f"{x}") + x = x + 1 + if x == 2: + print("Hello, World!") + else: + print("Goodbye, World!") + return + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +compile() From 317575644f9c5774ba7b633f82b0e47aff3a8dfe Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sat, 11 Oct 2025 00:18:11 +0530 Subject: [PATCH 16/43] Interpret bools as ints in binops --- pythonbpf/binary_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythonbpf/binary_ops.py b/pythonbpf/binary_ops.py index 40417f87..0380ebd0 100644 --- a/pythonbpf/binary_ops.py +++ b/pythonbpf/binary_ops.py @@ -21,7 +21,7 @@ def get_operand_value(func, operand, builder, local_sym_tab): raise ValueError(f"Undefined variable: {operand.id}") elif isinstance(operand, ast.Constant): if isinstance(operand.value, int): - cst = ir.Constant(ir.IntType(64), operand.value) + cst = ir.Constant(ir.IntType(64), int(operand.value)) return cst, [cst], None raise TypeError(f"Unsupported constant type: {type(operand.value)}") elif isinstance(operand, ast.BinOp): From cac88d15609196d95b04be48f31f94089f3f122e Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sat, 11 Oct 2025 02:44:08 +0530 Subject: [PATCH 17/43] Allow different int widths in binops --- pythonbpf/binary_ops.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pythonbpf/binary_ops.py b/pythonbpf/binary_ops.py index 0380ebd0..a277182f 100644 --- a/pythonbpf/binary_ops.py +++ b/pythonbpf/binary_ops.py @@ -3,13 +3,14 @@ from logging import Logger import logging -from pythonbpf.expr import get_base_type_and_depth, deref_to_depth +from pythonbpf.expr import get_base_type_and_depth, deref_to_depth, eval_expr logger: Logger = logging.getLogger(__name__) def get_operand_value(func, operand, builder, local_sym_tab): """Extract the value from an operand, handling variables and constants.""" + logger.info(f"Getting operand value for: {ast.dump(operand)}") if isinstance(operand, ast.Name): if operand.id in local_sym_tab: var = local_sym_tab[operand.id].var @@ -27,6 +28,12 @@ def get_operand_value(func, operand, builder, local_sym_tab): elif isinstance(operand, ast.BinOp): res = handle_binary_op_impl(func, operand, builder, local_sym_tab) return res, [res], None + elif isinstance(operand, ast.Call): + res = eval_expr(func, None, builder, operand, local_sym_tab, {}, {}) + if res is None: + raise ValueError(f"Failed to evaluate call expression: {operand}") + val, val_type = res + return val, [val], None raise TypeError(f"Unsupported operand type: {type(operand)}") @@ -48,6 +55,14 @@ def handle_binary_op_impl(func, rval, builder, local_sym_tab): logger.info(f"left chain: {lchain}, right chain: {rchain}") + # NOTE: Before doing the operation, if the operands are integers + # we always extend them to i64. The assignment to LHS will take + # care of truncation if needed. + if isinstance(left.type, ir.IntType) and left.type.width < 64: + left = builder.sext(left, ir.IntType(64)) + if isinstance(right.type, ir.IntType) and right.type.width < 64: + right = builder.sext(right, ir.IntType(64)) + # Map AST operation nodes to LLVM IR builder methods op_map = { ast.Add: builder.add, From c2c17741e59a7d67ee876db38c88c700f73c6e5a Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sat, 11 Oct 2025 03:04:26 +0530 Subject: [PATCH 18/43] Remove store_through_chain --- pythonbpf/binary_ops.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pythonbpf/binary_ops.py b/pythonbpf/binary_ops.py index a277182f..0fbb0c10 100644 --- a/pythonbpf/binary_ops.py +++ b/pythonbpf/binary_ops.py @@ -37,16 +37,6 @@ def get_operand_value(func, operand, builder, local_sym_tab): raise TypeError(f"Unsupported operand type: {type(operand)}") -def store_through_chain(value, chain, builder): - """Store a value through a pointer chain.""" - if not chain or len(chain) < 2: - raise ValueError("Pointer chain must have at least two elements") - - for ptr in reversed(chain[1:]): - builder.store(value, ptr) - value = ptr - - def handle_binary_op_impl(func, rval, builder, local_sym_tab): op = rval.op left, lchain, _ = get_operand_value(func, rval.left, builder, local_sym_tab) From 91a3fe140df3df7220f1877444250f69cbf62b07 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sat, 11 Oct 2025 03:06:24 +0530 Subject: [PATCH 19/43] Remove unnecessary return artifacts from get_operand_value --- pythonbpf/binary_ops.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pythonbpf/binary_ops.py b/pythonbpf/binary_ops.py index 0fbb0c10..e8dd32b3 100644 --- a/pythonbpf/binary_ops.py +++ b/pythonbpf/binary_ops.py @@ -18,33 +18,31 @@ def get_operand_value(func, operand, builder, local_sym_tab): base_type, depth = get_base_type_and_depth(var_type) logger.info(f"var is {var}, base_type is {base_type}, depth is {depth}") val = deref_to_depth(func, builder, var, depth) - return val, [val], var + return val raise ValueError(f"Undefined variable: {operand.id}") elif isinstance(operand, ast.Constant): if isinstance(operand.value, int): cst = ir.Constant(ir.IntType(64), int(operand.value)) - return cst, [cst], None + return cst raise TypeError(f"Unsupported constant type: {type(operand.value)}") elif isinstance(operand, ast.BinOp): res = handle_binary_op_impl(func, operand, builder, local_sym_tab) - return res, [res], None + return res elif isinstance(operand, ast.Call): res = eval_expr(func, None, builder, operand, local_sym_tab, {}, {}) if res is None: raise ValueError(f"Failed to evaluate call expression: {operand}") - val, val_type = res - return val, [val], None + val, _ = res + return val raise TypeError(f"Unsupported operand type: {type(operand)}") def handle_binary_op_impl(func, rval, builder, local_sym_tab): op = rval.op - left, lchain, _ = get_operand_value(func, rval.left, builder, local_sym_tab) - right, rchain, _ = get_operand_value(func, rval.right, builder, local_sym_tab) + left = get_operand_value(func, rval.left, builder, local_sym_tab) + right = get_operand_value(func, rval.right, builder, local_sym_tab) logger.info(f"left is {left}, right is {right}, op is {op}") - logger.info(f"left chain: {lchain}, right chain: {rchain}") - # NOTE: Before doing the operation, if the operands are integers # we always extend them to i64. The assignment to LHS will take # care of truncation if needed. From c9bbe1ffd87de30e28a3b1433659d0e85f7c433a Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sat, 11 Oct 2025 03:21:09 +0530 Subject: [PATCH 20/43] Call eval_expr properly within get_operand_value --- pythonbpf/binary_ops.py | 41 +++++++++++++++++++++++++++++-------- pythonbpf/expr/expr_pass.py | 11 +++++++++- 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/pythonbpf/binary_ops.py b/pythonbpf/binary_ops.py index e8dd32b3..a5b8dbeb 100644 --- a/pythonbpf/binary_ops.py +++ b/pythonbpf/binary_ops.py @@ -8,7 +8,9 @@ logger: Logger = logging.getLogger(__name__) -def get_operand_value(func, operand, builder, local_sym_tab): +def get_operand_value( + func, module, operand, builder, local_sym_tab, map_sym_tab, structs_sym_tab=None +): """Extract the value from an operand, handling variables and constants.""" logger.info(f"Getting operand value for: {ast.dump(operand)}") if isinstance(operand, ast.Name): @@ -26,10 +28,14 @@ def get_operand_value(func, operand, builder, local_sym_tab): return cst raise TypeError(f"Unsupported constant type: {type(operand.value)}") elif isinstance(operand, ast.BinOp): - res = handle_binary_op_impl(func, operand, builder, local_sym_tab) + res = handle_binary_op_impl( + func, module, operand, builder, local_sym_tab, map_sym_tab, structs_sym_tab + ) return res - elif isinstance(operand, ast.Call): - res = eval_expr(func, None, builder, operand, local_sym_tab, {}, {}) + else: + res = eval_expr( + func, module, builder, operand, local_sym_tab, map_sym_tab, structs_sym_tab + ) if res is None: raise ValueError(f"Failed to evaluate call expression: {operand}") val, _ = res @@ -37,10 +43,16 @@ def get_operand_value(func, operand, builder, local_sym_tab): raise TypeError(f"Unsupported operand type: {type(operand)}") -def handle_binary_op_impl(func, rval, builder, local_sym_tab): +def handle_binary_op_impl( + func, module, rval, builder, local_sym_tab, map_sym_tab, structs_sym_tab=None +): op = rval.op - left = get_operand_value(func, rval.left, builder, local_sym_tab) - right = get_operand_value(func, rval.right, builder, local_sym_tab) + left = get_operand_value( + func, module, rval.left, builder, local_sym_tab, map_sym_tab, structs_sym_tab + ) + right = get_operand_value( + func, module, rval.right, builder, local_sym_tab, map_sym_tab, structs_sym_tab + ) logger.info(f"left is {left}, right is {right}, op is {op}") # NOTE: Before doing the operation, if the operands are integers @@ -73,8 +85,19 @@ def handle_binary_op_impl(func, rval, builder, local_sym_tab): raise SyntaxError("Unsupported binary operation") -def handle_binary_op(func, rval, builder, var_name, local_sym_tab): - result = handle_binary_op_impl(func, rval, builder, local_sym_tab) +def handle_binary_op( + func, + module, + rval, + builder, + var_name, + local_sym_tab, + map_sym_tab, + structs_sym_tab=None, +): + result = handle_binary_op_impl( + func, module, rval, builder, local_sym_tab, map_sym_tab, structs_sym_tab + ) if var_name and var_name in local_sym_tab: logger.info( f"Storing result {result} into variable {local_sym_tab[var_name].var}" diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index 8a5b6089..6f794065 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -402,7 +402,16 @@ def eval_expr( elif isinstance(expr, ast.BinOp): from pythonbpf.binary_ops import handle_binary_op - return handle_binary_op(func, expr, builder, None, local_sym_tab) + return handle_binary_op( + func, + module, + expr, + builder, + None, + local_sym_tab, + map_sym_tab, + structs_sym_tab, + ) elif isinstance(expr, ast.Compare): return _handle_compare( func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab From 8b7b1c08a508d4d0f7c9effdf2a5af816191744b Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sat, 11 Oct 2025 22:03:32 +0530 Subject: [PATCH 21/43] Add struct_and_helper_binops passing test for assignments --- .../assign/struct_and_helper_binops.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 tests/passing_tests/assign/struct_and_helper_binops.py diff --git a/tests/passing_tests/assign/struct_and_helper_binops.py b/tests/passing_tests/assign/struct_and_helper_binops.py new file mode 100644 index 00000000..7e75de6e --- /dev/null +++ b/tests/passing_tests/assign/struct_and_helper_binops.py @@ -0,0 +1,40 @@ +from pythonbpf import bpf, section, bpfglobal, compile, struct +from ctypes import c_void_p, c_int64, c_uint64 +from pythonbpf.helper import ktime + + +@bpf +@struct +class data_t: + pid: c_uint64 + ts: c_uint64 + + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def hello_world(ctx: c_void_p) -> c_int64: + dat = data_t() + dat.pid = 123 + dat.pid = dat.pid + 1 + print(f"pid is {dat.pid}") + x = ktime() - 121 + print(f"ktime is {x}") + x = 1 + x = x + 1 + print(f"x is {x}") + if x == 2: + jat = data_t() + jat.ts = 456 + print(f"Hello, World!, ts is {jat.ts}") + else: + print("Goodbye, World!") + return + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +compile() From 8776d7607f6f024c5ba2ec48548854ecfb572348 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 00:17:10 +0530 Subject: [PATCH 22/43] Add count_temps_in_call to call scratch space needed in a helper call --- pythonbpf/functions/functions_pass.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 41d40481..264ce935 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -383,6 +383,33 @@ def process_stmt( return did_return +def count_temps_in_call(call_node): + """Count the number of temporary variables needed for a function call.""" + + count = 0 + is_helper = False + + if isinstance(call_node.func, ast.Name): + if HelperHandlerRegistry.has_handler(call_node.func.id): + is_helper = True + elif isinstance(call_node.func, ast.Attribute): + if HelperHandlerRegistry.has_handler(call_node.func.attr): + is_helper = True + + if not is_helper: + return 0 + + for arg in call_node.args: + if ( + isinstance(arg, ast.BinOp) + or isinstance(arg, ast.Constant) + or isinstance(arg, ast.UnaryOp) + ): + count += 1 + + return count + + def allocate_mem( module, builder, body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab ): From 321415fa283e2bdb44ff8c7b847e9b1164404383 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 00:33:07 +0530 Subject: [PATCH 23/43] Add update_max_temps_for_stmt in allocate_mem --- pythonbpf/functions/functions_pass.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 264ce935..07fc9d41 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -414,6 +414,16 @@ def allocate_mem( module, builder, body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab ): double_alloc = False + max_temps_needed = 0 + + def update_max_temps_for_stmt(stmt): + nonlocal max_temps_needed + + for node in ast.walk(stmt): + if isinstance(node, ast.Call): + temps_needed = count_temps_in_call(node) + max_temps_needed = max(max_temps_needed, temps_needed) + for stmt in body: has_metadata = False if isinstance(stmt, ast.If): From 6bce29b90f397c1b8a1539550c3cce5420cbccf0 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 00:37:57 +0530 Subject: [PATCH 24/43] Allocate scratch space for temp vars at the end of allocate_mem --- pythonbpf/functions/functions_pass.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 07fc9d41..1d23d479 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -425,6 +425,7 @@ def update_max_temps_for_stmt(stmt): max_temps_needed = max(max_temps_needed, temps_needed) for stmt in body: + update_max_temps_for_stmt(stmt) has_metadata = False if isinstance(stmt, ast.If): if stmt.body: @@ -545,6 +546,13 @@ def update_max_temps_for_stmt(stmt): if double_alloc: local_sym_tab[f"{var_name}_tmp"] = LocalSymbol(var_tmp, tmp_ir_type) + + logger.info(f"Temporary scratch space needed for calls: {max_temps_needed}") + for i in range(max_temps_needed): + temp_var = builder.alloca(ir.IntType(64), name=f"__helper_temp_{i}") + temp_var.align = 8 + local_sym_tab[f"__helper_temp_{i}"] = LocalSymbol(temp_var, ir.IntType(64)) + return local_sym_tab From 5dcf670f493026b68e8fcd40f5dd05b76fb99762 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 01:47:11 +0530 Subject: [PATCH 25/43] Add ScratchPoolManager and it's singleton --- pythonbpf/helper/helper_utils.py | 38 ++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/pythonbpf/helper/helper_utils.py b/pythonbpf/helper/helper_utils.py index 7b46a40b..077734e7 100644 --- a/pythonbpf/helper/helper_utils.py +++ b/pythonbpf/helper/helper_utils.py @@ -34,6 +34,44 @@ def has_handler(cls, helper_name): return helper_name in cls._handlers +class ScratchPoolManager: + """Manage the temporary helper variables in local_sym_tab""" + + def __init__(self): + self._counter = 0 + + @property + def counter(self): + return self._counter + + def reset(self): + self._counter = 0 + logger.debug("Scratch pool counter reset to 0") + + def get_next_temp(self, local_sym_tab): + temp_name = f"__helper_temp_{self._counter}" + self._counter += 1 + + if temp_name not in local_sym_tab: + raise ValueError( + f"Scratch pool exhausted or inadequate: {temp_name}. " + f"Current counter: {self._counter}" + ) + + +_temp_pool_manager = ScratchPoolManager() # Singleton instance + + +def reset_scratch_pool(): + """Reset the scratch pool counter""" + _temp_pool_manager.reset() + + +def get_next_scratch_temp(local_sym_tab): + """Get the next temporary variable name from the scratch pool""" + return _temp_pool_manager.get_next_temp(local_sym_tab) + + def get_var_ptr_from_name(var_name, local_sym_tab): """Get a pointer to a variable from the symbol table.""" if local_sym_tab and var_name in local_sym_tab: From 207f714027777784bcdcc3705be59b2113565161 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 04:17:37 +0530 Subject: [PATCH 26/43] Use scratch space to store consts passed to helpers --- pythonbpf/functions/functions_pass.py | 7 ++++++- pythonbpf/helper/__init__.py | 3 ++- pythonbpf/helper/helper_utils.py | 20 +++++++++----------- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 1d23d479..64acad4b 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -4,7 +4,11 @@ from typing import Any from dataclasses import dataclass -from pythonbpf.helper import HelperHandlerRegistry, handle_helper_call +from pythonbpf.helper import ( + HelperHandlerRegistry, + handle_helper_call, + reset_scratch_pool, +) from pythonbpf.type_deducer import ctypes_to_ir from pythonbpf.binary_ops import handle_binary_op from pythonbpf.expr import eval_expr, handle_expr, convert_to_bool @@ -353,6 +357,7 @@ def process_stmt( ret_type=ir.IntType(64), ): logger.info(f"Processing statement: {ast.dump(stmt)}") + reset_scratch_pool() if isinstance(stmt, ast.Expr): handle_expr( func, diff --git a/pythonbpf/helper/__init__.py b/pythonbpf/helper/__init__.py index a7ad1697..007724f7 100644 --- a/pythonbpf/helper/__init__.py +++ b/pythonbpf/helper/__init__.py @@ -1,9 +1,10 @@ -from .helper_utils import HelperHandlerRegistry +from .helper_utils import HelperHandlerRegistry, reset_scratch_pool from .bpf_helper_handler import handle_helper_call from .helpers import ktime, pid, deref, XDP_DROP, XDP_PASS __all__ = [ "HelperHandlerRegistry", + "reset_scratch_pool", "handle_helper_call", "ktime", "pid", diff --git a/pythonbpf/helper/helper_utils.py b/pythonbpf/helper/helper_utils.py index 077734e7..2874668d 100644 --- a/pythonbpf/helper/helper_utils.py +++ b/pythonbpf/helper/helper_utils.py @@ -58,6 +58,8 @@ def get_next_temp(self, local_sym_tab): f"Current counter: {self._counter}" ) + return local_sym_tab[temp_name].var, temp_name + _temp_pool_manager = ScratchPoolManager() # Singleton instance @@ -67,11 +69,6 @@ def reset_scratch_pool(): _temp_pool_manager.reset() -def get_next_scratch_temp(local_sym_tab): - """Get the next temporary variable name from the scratch pool""" - return _temp_pool_manager.get_next_temp(local_sym_tab) - - def get_var_ptr_from_name(var_name, local_sym_tab): """Get a pointer to a variable from the symbol table.""" if local_sym_tab and var_name in local_sym_tab: @@ -79,13 +76,14 @@ def get_var_ptr_from_name(var_name, local_sym_tab): raise ValueError(f"Variable '{var_name}' not found in local symbol table") -def create_int_constant_ptr(value, builder, int_width=64): +def create_int_constant_ptr(value, builder, local_sym_tab, int_width=64): """Create a pointer to an integer constant.""" + # Default to 64-bit integer - int_type = ir.IntType(int_width) - ptr = builder.alloca(int_type) - ptr.align = int_type.width // 8 - builder.store(ir.Constant(int_type, value), ptr) + ptr, temp_name = _temp_pool_manager.get_next_temp(local_sym_tab) + logger.debug(f"Using temp variable '{temp_name}' for int constant {value}") + const_val = ir.Constant(ir.IntType(int_width), value) + builder.store(const_val, ptr) return ptr @@ -95,7 +93,7 @@ def get_or_create_ptr_from_arg(arg, builder, local_sym_tab): if isinstance(arg, ast.Name): ptr = get_var_ptr_from_name(arg.id, local_sym_tab) elif isinstance(arg, ast.Constant) and isinstance(arg.value, int): - ptr = create_int_constant_ptr(arg.value, builder) + ptr = create_int_constant_ptr(arg.value, builder, local_sym_tab) else: raise NotImplementedError( "Only simple variable names are supported as args in map helpers." From cd74e896cff9048645d39720c760ea63ff28ae5f Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 04:20:46 +0530 Subject: [PATCH 27/43] Allow binops as args to helpers accepting int* --- pythonbpf/helper/helper_utils.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pythonbpf/helper/helper_utils.py b/pythonbpf/helper/helper_utils.py index 2874668d..53198fdf 100644 --- a/pythonbpf/helper/helper_utils.py +++ b/pythonbpf/helper/helper_utils.py @@ -94,6 +94,25 @@ def get_or_create_ptr_from_arg(arg, builder, local_sym_tab): ptr = get_var_ptr_from_name(arg.id, local_sym_tab) elif isinstance(arg, ast.Constant) and isinstance(arg.value, int): ptr = create_int_constant_ptr(arg.value, builder, local_sym_tab) + elif isinstance(arg, ast.BinOp): + # Evaluate the expression and store the result in a temp variable + val, _ = eval_expr( + None, + None, + builder, + arg, + local_sym_tab, + None, + None, + ) + if val is None: + raise ValueError("Failed to evaluate expression for helper arg.") + + # NOTE: We assume the result is an int64 for now + ptr, temp_name = _temp_pool_manager.get_next_temp(local_sym_tab) + logger.debug(f"Using temp variable '{temp_name}' for expression result") + builder.store(val, ptr) + else: raise NotImplementedError( "Only simple variable names are supported as args in map helpers." From d66e6a6aff4f0c8077125ab845dc7a87062b37b1 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 06:00:50 +0530 Subject: [PATCH 28/43] Allow struct members as helper args --- pythonbpf/helper/bpf_helper_handler.py | 16 ++++++++++++---- pythonbpf/helper/helper_utils.py | 16 +++++++--------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/pythonbpf/helper/bpf_helper_handler.py b/pythonbpf/helper/bpf_helper_handler.py index f5ae9a0a..0dd2ba4e 100644 --- a/pythonbpf/helper/bpf_helper_handler.py +++ b/pythonbpf/helper/bpf_helper_handler.py @@ -64,7 +64,9 @@ def bpf_map_lookup_elem_emitter( raise ValueError( f"Map lookup expects exactly one argument (key), got {len(call.args)}" ) - key_ptr = get_or_create_ptr_from_arg(call.args[0], builder, local_sym_tab) + key_ptr = get_or_create_ptr_from_arg( + func, module, call.args[0], builder, local_sym_tab, struct_sym_tab + ) map_void_ptr = builder.bitcast(map_ptr, ir.PointerType()) fn_type = ir.FunctionType( @@ -152,8 +154,12 @@ def bpf_map_update_elem_emitter( value_arg = call.args[1] flags_arg = call.args[2] if len(call.args) > 2 else None - key_ptr = get_or_create_ptr_from_arg(key_arg, builder, local_sym_tab) - value_ptr = get_or_create_ptr_from_arg(value_arg, builder, local_sym_tab) + key_ptr = get_or_create_ptr_from_arg( + func, module, key_arg, builder, local_sym_tab, struct_sym_tab + ) + value_ptr = get_or_create_ptr_from_arg( + func, module, value_arg, builder, local_sym_tab, struct_sym_tab + ) flags_val = get_flags_val(flags_arg, builder, local_sym_tab) map_void_ptr = builder.bitcast(map_ptr, ir.PointerType()) @@ -197,7 +203,9 @@ def bpf_map_delete_elem_emitter( raise ValueError( f"Map delete expects exactly one argument (key), got {len(call.args)}" ) - key_ptr = get_or_create_ptr_from_arg(call.args[0], builder, local_sym_tab) + key_ptr = get_or_create_ptr_from_arg( + func, module, call.args[0], builder, local_sym_tab, struct_sym_tab + ) map_void_ptr = builder.bitcast(map_ptr, ir.PointerType()) # Define function type for bpf_map_delete_elem diff --git a/pythonbpf/helper/helper_utils.py b/pythonbpf/helper/helper_utils.py index 53198fdf..c12e56b5 100644 --- a/pythonbpf/helper/helper_utils.py +++ b/pythonbpf/helper/helper_utils.py @@ -87,23 +87,25 @@ def create_int_constant_ptr(value, builder, local_sym_tab, int_width=64): return ptr -def get_or_create_ptr_from_arg(arg, builder, local_sym_tab): +def get_or_create_ptr_from_arg( + func, module, arg, builder, local_sym_tab, struct_sym_tab=None +): """Extract or create pointer from the call arguments.""" if isinstance(arg, ast.Name): ptr = get_var_ptr_from_name(arg.id, local_sym_tab) elif isinstance(arg, ast.Constant) and isinstance(arg.value, int): ptr = create_int_constant_ptr(arg.value, builder, local_sym_tab) - elif isinstance(arg, ast.BinOp): + else: # Evaluate the expression and store the result in a temp variable val, _ = eval_expr( - None, - None, + func, + module, builder, arg, local_sym_tab, None, - None, + struct_sym_tab, ) if val is None: raise ValueError("Failed to evaluate expression for helper arg.") @@ -113,10 +115,6 @@ def get_or_create_ptr_from_arg(arg, builder, local_sym_tab): logger.debug(f"Using temp variable '{temp_name}' for expression result") builder.store(val, ptr) - else: - raise NotImplementedError( - "Only simple variable names are supported as args in map helpers." - ) return ptr From 2cf68f64735e48d0c90faa109842956f3da9f221 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 07:57:55 +0530 Subject: [PATCH 29/43] Allow map-based helpers to be used as helper args / within binops which are helper args --- pythonbpf/binary_ops.py | 4 +++ pythonbpf/functions/functions_pass.py | 38 ++++++++++++++++++++------ pythonbpf/helper/bpf_helper_handler.py | 22 +++++++++++---- pythonbpf/helper/helper_utils.py | 10 ++++--- 4 files changed, 56 insertions(+), 18 deletions(-) diff --git a/pythonbpf/binary_ops.py b/pythonbpf/binary_ops.py index a5b8dbeb..6ea534b3 100644 --- a/pythonbpf/binary_ops.py +++ b/pythonbpf/binary_ops.py @@ -39,6 +39,10 @@ def get_operand_value( if res is None: raise ValueError(f"Failed to evaluate call expression: {operand}") val, _ = res + logger.info(f"Evaluated expr to {val} of type {val.type}") + base_type, depth = get_base_type_and_depth(val.type) + if depth > 0: + val = deref_to_depth(func, builder, val, depth) return val raise TypeError(f"Unsupported operand type: {type(operand)}") diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 64acad4b..ae3f94be 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -388,14 +388,18 @@ def process_stmt( return did_return -def count_temps_in_call(call_node): +def count_temps_in_call(call_node, local_sym_tab): """Count the number of temporary variables needed for a function call.""" count = 0 is_helper = False + # NOTE: We exclude print calls for now if isinstance(call_node.func, ast.Name): - if HelperHandlerRegistry.has_handler(call_node.func.id): + if ( + HelperHandlerRegistry.has_handler(call_node.func.id) + and call_node.func.id != "print" + ): is_helper = True elif isinstance(call_node.func, ast.Attribute): if HelperHandlerRegistry.has_handler(call_node.func.attr): @@ -405,10 +409,11 @@ def count_temps_in_call(call_node): return 0 for arg in call_node.args: - if ( - isinstance(arg, ast.BinOp) - or isinstance(arg, ast.Constant) - or isinstance(arg, ast.UnaryOp) + # NOTE: Count all non-name arguments + # For struct fields, if it is being passed as an argument, + # The struct object should already exist in the local_sym_tab + if not isinstance(arg, ast.Name) and not ( + isinstance(arg, ast.Attribute) and arg.value.id in local_sym_tab ): count += 1 @@ -423,11 +428,19 @@ def allocate_mem( def update_max_temps_for_stmt(stmt): nonlocal max_temps_needed + temps_needed = 0 + + if isinstance(stmt, ast.If): + for s in stmt.body: + update_max_temps_for_stmt(s) + for s in stmt.orelse: + update_max_temps_for_stmt(s) + return for node in ast.walk(stmt): if isinstance(node, ast.Call): - temps_needed = count_temps_in_call(node) - max_temps_needed = max(max_temps_needed, temps_needed) + temps_needed += count_temps_in_call(node, local_sym_tab) + max_temps_needed = max(max_temps_needed, temps_needed) for stmt in body: update_max_temps_for_stmt(stmt) @@ -460,9 +473,16 @@ def update_max_temps_for_stmt(stmt): logger.info("Unsupported multiassignment") continue target = stmt.targets[0] - if not isinstance(target, ast.Name): + if not isinstance(target, ast.Name) and not isinstance( + target, ast.Attribute + ): logger.info("Unsupported assignment target") continue + if isinstance(target, ast.Attribute): + logger.info( + f"Struct field {target.attr} assignment, will be handled later" + ) + continue var_name = target.id rval = stmt.value if var_name in local_sym_tab: diff --git a/pythonbpf/helper/bpf_helper_handler.py b/pythonbpf/helper/bpf_helper_handler.py index 0dd2ba4e..44731d71 100644 --- a/pythonbpf/helper/bpf_helper_handler.py +++ b/pythonbpf/helper/bpf_helper_handler.py @@ -34,6 +34,7 @@ def bpf_ktime_get_ns_emitter( func, local_sym_tab=None, struct_sym_tab=None, + map_sym_tab=None, ): """ Emit LLVM IR for bpf_ktime_get_ns helper function call. @@ -56,6 +57,7 @@ def bpf_map_lookup_elem_emitter( func, local_sym_tab=None, struct_sym_tab=None, + map_sym_tab=None, ): """ Emit LLVM IR for bpf_map_lookup_elem helper function call. @@ -65,12 +67,16 @@ def bpf_map_lookup_elem_emitter( f"Map lookup expects exactly one argument (key), got {len(call.args)}" ) key_ptr = get_or_create_ptr_from_arg( - func, module, call.args[0], builder, local_sym_tab, struct_sym_tab + func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab ) map_void_ptr = builder.bitcast(map_ptr, ir.PointerType()) + # TODO: I have changed the return typr to i64*, as we are + # allocating space for that type in allocate_mem. This is + # temporary, and we will honour other widths later. But this + # allows us to have cool binary ops on the returned value. fn_type = ir.FunctionType( - ir.PointerType(), # Return type: void* + ir.PointerType(ir.IntType(64)), # Return type: void* [ir.PointerType(), ir.PointerType()], # Args: (void*, void*) var_arg=False, ) @@ -93,6 +99,7 @@ def bpf_printk_emitter( func, local_sym_tab=None, struct_sym_tab=None, + map_sym_tab=None, ): """Emit LLVM IR for bpf_printk helper function call.""" if not hasattr(func, "_fmt_counter"): @@ -140,6 +147,7 @@ def bpf_map_update_elem_emitter( func, local_sym_tab=None, struct_sym_tab=None, + map_sym_tab=None, ): """ Emit LLVM IR for bpf_map_update_elem helper function call. @@ -155,10 +163,10 @@ def bpf_map_update_elem_emitter( flags_arg = call.args[2] if len(call.args) > 2 else None key_ptr = get_or_create_ptr_from_arg( - func, module, key_arg, builder, local_sym_tab, struct_sym_tab + func, module, key_arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab ) value_ptr = get_or_create_ptr_from_arg( - func, module, value_arg, builder, local_sym_tab, struct_sym_tab + func, module, value_arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab ) flags_val = get_flags_val(flags_arg, builder, local_sym_tab) @@ -194,6 +202,7 @@ def bpf_map_delete_elem_emitter( func, local_sym_tab=None, struct_sym_tab=None, + map_sym_tab=None, ): """ Emit LLVM IR for bpf_map_delete_elem helper function call. @@ -204,7 +213,7 @@ def bpf_map_delete_elem_emitter( f"Map delete expects exactly one argument (key), got {len(call.args)}" ) key_ptr = get_or_create_ptr_from_arg( - func, module, call.args[0], builder, local_sym_tab, struct_sym_tab + func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab ) map_void_ptr = builder.bitcast(map_ptr, ir.PointerType()) @@ -233,6 +242,7 @@ def bpf_get_current_pid_tgid_emitter( func, local_sym_tab=None, struct_sym_tab=None, + map_sym_tab=None, ): """ Emit LLVM IR for bpf_get_current_pid_tgid helper function call. @@ -259,6 +269,7 @@ def bpf_perf_event_output_handler( func, local_sym_tab=None, struct_sym_tab=None, + map_sym_tab=None, ): if len(call.args) != 1: raise ValueError( @@ -323,6 +334,7 @@ def invoke_helper(method_name, map_ptr=None): func, local_sym_tab, struct_sym_tab, + map_sym_tab, ) # Handle direct function calls (e.g., print(), ktime()) diff --git a/pythonbpf/helper/helper_utils.py b/pythonbpf/helper/helper_utils.py index c12e56b5..5960c9ac 100644 --- a/pythonbpf/helper/helper_utils.py +++ b/pythonbpf/helper/helper_utils.py @@ -81,14 +81,14 @@ def create_int_constant_ptr(value, builder, local_sym_tab, int_width=64): # Default to 64-bit integer ptr, temp_name = _temp_pool_manager.get_next_temp(local_sym_tab) - logger.debug(f"Using temp variable '{temp_name}' for int constant {value}") + logger.info(f"Using temp variable '{temp_name}' for int constant {value}") const_val = ir.Constant(ir.IntType(int_width), value) builder.store(const_val, ptr) return ptr def get_or_create_ptr_from_arg( - func, module, arg, builder, local_sym_tab, struct_sym_tab=None + func, module, arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab=None ): """Extract or create pointer from the call arguments.""" @@ -104,15 +104,17 @@ def get_or_create_ptr_from_arg( builder, arg, local_sym_tab, - None, + map_sym_tab, struct_sym_tab, ) if val is None: raise ValueError("Failed to evaluate expression for helper arg.") # NOTE: We assume the result is an int64 for now + # if isinstance(arg, ast.Attribute): + # return val ptr, temp_name = _temp_pool_manager.get_next_temp(local_sym_tab) - logger.debug(f"Using temp variable '{temp_name}' for expression result") + logger.info(f"Using temp variable '{temp_name}' for expression result") builder.store(val, ptr) return ptr From 4e33fd4a32b10d0b23a029ba31e05b5e8388c3eb Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 09:11:56 +0530 Subject: [PATCH 30/43] Add negation UnaryOp --- pythonbpf/expr/expr_pass.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index 6f794065..bbb7277d 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -176,7 +176,7 @@ def _handle_unary_op( structs_sym_tab=None, ): """Handle ast.UnaryOp expressions.""" - if not isinstance(expr.op, ast.Not): + if not isinstance(expr.op, ast.Not) and not isinstance(expr.op, ast.USub): logger.error("Only 'not' unary operator is supported") return None @@ -188,9 +188,16 @@ def _handle_unary_op( return None operand_val, operand_type = operand - true_const = ir.Constant(ir.IntType(1), 1) - result = builder.xor(convert_to_bool(builder, operand_val), true_const) - return result, ir.IntType(1) + + if isinstance(expr.op, ast.Not): + true_const = ir.Constant(ir.IntType(1), 1) + result = builder.xor(convert_to_bool(builder, operand_val), true_const) + return result, ir.IntType(1) + elif isinstance(expr.op, ast.USub): + # Multiply by -1 + neg_one = ir.Constant(ir.IntType(64), -1) + result = builder.mul(operand_val, neg_one) + return result, ir.IntType(64) def _handle_and_op(func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab): From a3b4d09652e5a075c458249ab0598c5e856d13da Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 09:13:04 +0530 Subject: [PATCH 31/43] Fix errorstring in _handle_unary_op --- pythonbpf/expr/expr_pass.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index bbb7277d..e0e0fed3 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -177,7 +177,7 @@ def _handle_unary_op( ): """Handle ast.UnaryOp expressions.""" if not isinstance(expr.op, ast.Not) and not isinstance(expr.op, ast.USub): - logger.error("Only 'not' unary operator is supported") + logger.error("Only 'not' and '-' unary operators are supported") return None operand = eval_expr( From e8026a13bf7166c44423fe49233d7faeb13353f7 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 09:30:37 +0530 Subject: [PATCH 32/43] Allow helpers to be called within themselves --- pythonbpf/expr/expr_pass.py | 12 ++++++------ pythonbpf/helper/helper_utils.py | 11 +++-------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index e0e0fed3..ecf11192 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -180,23 +180,23 @@ def _handle_unary_op( logger.error("Only 'not' and '-' unary operators are supported") return None - operand = eval_expr( - func, module, builder, expr.operand, local_sym_tab, map_sym_tab, structs_sym_tab + from pythonbpf.binary_ops import get_operand_value + + operand = get_operand_value( + func, module, expr.operand, builder, local_sym_tab, map_sym_tab, structs_sym_tab ) if operand is None: logger.error("Failed to evaluate operand for unary operation") return None - operand_val, operand_type = operand - if isinstance(expr.op, ast.Not): true_const = ir.Constant(ir.IntType(1), 1) - result = builder.xor(convert_to_bool(builder, operand_val), true_const) + result = builder.xor(convert_to_bool(builder, operand), true_const) return result, ir.IntType(1) elif isinstance(expr.op, ast.USub): # Multiply by -1 neg_one = ir.Constant(ir.IntType(64), -1) - result = builder.mul(operand_val, neg_one) + result = builder.mul(operand, neg_one) return result, ir.IntType(64) diff --git a/pythonbpf/helper/helper_utils.py b/pythonbpf/helper/helper_utils.py index 5960c9ac..284aa686 100644 --- a/pythonbpf/helper/helper_utils.py +++ b/pythonbpf/helper/helper_utils.py @@ -4,6 +4,7 @@ from llvmlite import ir from pythonbpf.expr import eval_expr, get_base_type_and_depth, deref_to_depth +from pythonbpf.binary_ops import get_operand_value logger = logging.getLogger(__name__) @@ -98,14 +99,8 @@ def get_or_create_ptr_from_arg( ptr = create_int_constant_ptr(arg.value, builder, local_sym_tab) else: # Evaluate the expression and store the result in a temp variable - val, _ = eval_expr( - func, - module, - builder, - arg, - local_sym_tab, - map_sym_tab, - struct_sym_tab, + val = get_operand_value( + func, module, arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab ) if val is None: raise ValueError("Failed to evaluate expression for helper arg.") From fa82dc7ebd87456104c06a2161b39c7d5332bddc Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 09:39:33 +0530 Subject: [PATCH 33/43] Add comprehensive passing test for assignment --- tests/passing_tests/assign/comprehensive.py | 68 +++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 tests/passing_tests/assign/comprehensive.py diff --git a/tests/passing_tests/assign/comprehensive.py b/tests/passing_tests/assign/comprehensive.py new file mode 100644 index 00000000..9c157ccc --- /dev/null +++ b/tests/passing_tests/assign/comprehensive.py @@ -0,0 +1,68 @@ +from pythonbpf import bpf, map, section, bpfglobal, compile, struct +from ctypes import c_void_p, c_int64, c_int32, c_uint64 +from pythonbpf.maps import HashMap +from pythonbpf.helper import ktime + + +# NOTE: This is a comprehensive test combining struct, helper, and map features +# Please note that at line 50, though we have used an absurd expression to test +# the comiler, it is recommended to used named variables to reduce the amount of +# scratch space that needs to be allocated. + +@bpf +@struct +class data_t: + pid: c_uint64 + ts: c_uint64 + + +@bpf +@map +def last() -> HashMap: + return HashMap(key=c_uint64, value=c_uint64, max_entries=3) + + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def hello_world(ctx: c_void_p) -> c_int64: + dat = data_t() + dat.pid = 123 + dat.pid = dat.pid + 1 + print(f"pid is {dat.pid}") + tu = 9 + last.update(0, tu) + last.update(1, -last.lookup(0)) + x = last.lookup(0) + print(f"Map value at index 0: {x}") + x = x + c_int32(1) + print(f"x after adding 32-bit 1 is {x}") + x = ktime() - 121 + print(f"ktime - 121 is {x}") + x = last.lookup(0) + x = x + 1 + print(f"x is {x}") + if x == 11: + jat = data_t() + jat.ts = 456 + print(f"Hello, World!, ts is {jat.ts}") + a = last.lookup(0) + print(f"a is {a}") + last.update(0, last.lookup(last.lookup(0)) + + last.lookup(last.lookup(0)) + last.lookup(last.lookup(0))) + z = last.lookup(0) + print(f"new map val at index 0 is {z}") + else: + a = last.lookup(0) + print("Goodbye, World!") + c = last.lookup(1 - 1) + print(f"c is {c}") + return + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +compile() From b93f704eb83b2fa6856f7c77a4cae3cda73ce4f5 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 09:46:16 +0530 Subject: [PATCH 34/43] Tweak the comprehensive assignment test --- tests/passing_tests/assign/comprehensive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/passing_tests/assign/comprehensive.py b/tests/passing_tests/assign/comprehensive.py index 9c157ccc..b73b0877 100644 --- a/tests/passing_tests/assign/comprehensive.py +++ b/tests/passing_tests/assign/comprehensive.py @@ -41,7 +41,7 @@ def hello_world(ctx: c_void_p) -> c_int64: x = last.lookup(0) x = x + 1 print(f"x is {x}") - if x == 11: + if x == 10: jat = data_t() jat.ts = 456 print(f"Hello, World!, ts is {jat.ts}") From 933d2a5c77a3c24bd1b4dbfc76a217c02ccd48c6 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 09:47:57 +0530 Subject: [PATCH 35/43] Fix comprehensive assignment test --- tests/passing_tests/assign/comprehensive.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/passing_tests/assign/comprehensive.py b/tests/passing_tests/assign/comprehensive.py index b73b0877..625fb81d 100644 --- a/tests/passing_tests/assign/comprehensive.py +++ b/tests/passing_tests/assign/comprehensive.py @@ -47,6 +47,7 @@ def hello_world(ctx: c_void_p) -> c_int64: print(f"Hello, World!, ts is {jat.ts}") a = last.lookup(0) print(f"a is {a}") + last.update(9, 9) last.update(0, last.lookup(last.lookup(0)) + last.lookup(last.lookup(0)) + last.lookup(last.lookup(0))) z = last.lookup(0) From 105c5a7bd04bb7ff0adc9b15ea82d8ee1eb33661 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 10:12:45 +0530 Subject: [PATCH 36/43] Cleanup handle_assign --- pythonbpf/functions/functions_pass.py | 212 ++++---------------------- 1 file changed, 33 insertions(+), 179 deletions(-) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index ae3f94be..23317b77 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -6,11 +6,9 @@ from pythonbpf.helper import ( HelperHandlerRegistry, - handle_helper_call, reset_scratch_pool, ) from pythonbpf.type_deducer import ctypes_to_ir -from pythonbpf.binary_ops import handle_binary_op from pythonbpf.expr import eval_expr, handle_expr, convert_to_bool from pythonbpf.assign_pass import handle_variable_assignment @@ -80,191 +78,47 @@ def handle_assign( logger.error(f"Failed to handle assignment to {var_name}") return - num_types = ("c_int32", "c_int64", "c_uint32", "c_uint64") - logger.info(f"Handling assignment to {ast.dump(target)}") if not isinstance(target, ast.Name) and not isinstance(target, ast.Attribute): logger.info("Unsupported assignment target") return var_name = target.id if isinstance(target, ast.Name) else target.value.id rval = stmt.value - if isinstance(target, ast.Attribute): - # struct field assignment - field_name = target.attr - if var_name in local_sym_tab: - struct_type = local_sym_tab[var_name].metadata - struct_info = structs_sym_tab[struct_type] - if field_name in struct_info.fields: - field_ptr = struct_info.gep( - builder, local_sym_tab[var_name].var, field_name - ) - val = eval_expr( - func, - module, - builder, - rval, - local_sym_tab, - map_sym_tab, - structs_sym_tab, - ) - if isinstance(struct_info.field_type(field_name), ir.ArrayType) and val[ - 1 - ] == ir.PointerType(ir.IntType(8)): - # TODO: Figure it out, not a priority rn - # Special case for string assignment to char array - # str_len = struct_info["field_types"][field_idx].count - # assign_string_to_array(builder, field_ptr, val[0], str_len) - # print(f"Assigned to struct field {var_name}.{field_name}") - pass - if val is None: - logger.info("Failed to evaluate struct field assignment") - return - logger.info(field_ptr) - builder.store(val[0], field_ptr) - logger.info(f"Assigned to struct field {var_name}.{field_name}") - return - elif isinstance(rval, ast.Constant): - if isinstance(rval.value, bool): - if rval.value: - builder.store( - ir.Constant(ir.IntType(1), 1), local_sym_tab[var_name].var - ) - else: - builder.store( - ir.Constant(ir.IntType(1), 0), local_sym_tab[var_name].var - ) - logger.info(f"Assigned constant {rval.value} to {var_name}") - elif isinstance(rval.value, int): - # Assume c_int64 for now - # var = builder.alloca(ir.IntType(64), name=var_name) - # var.align = 8 - builder.store( - ir.Constant(ir.IntType(64), rval.value), local_sym_tab[var_name].var - ) - logger.info(f"Assigned constant {rval.value} to {var_name}") - elif isinstance(rval.value, str): - str_val = rval.value.encode("utf-8") + b"\x00" - str_const = ir.Constant( - ir.ArrayType(ir.IntType(8), len(str_val)), bytearray(str_val) + + # struct field assignment + field_name = target.attr + if var_name in local_sym_tab: + struct_type = local_sym_tab[var_name].metadata + struct_info = structs_sym_tab[struct_type] + if field_name in struct_info.fields: + field_ptr = struct_info.gep( + builder, local_sym_tab[var_name].var, field_name ) - global_str = ir.GlobalVariable( - module, str_const.type, name=f"{var_name}_str" + val = eval_expr( + func, + module, + builder, + rval, + local_sym_tab, + map_sym_tab, + structs_sym_tab, ) - global_str.linkage = "internal" - global_str.global_constant = True - global_str.initializer = str_const - str_ptr = builder.bitcast(global_str, ir.PointerType(ir.IntType(8))) - builder.store(str_ptr, local_sym_tab[var_name].var) - logger.info(f"Assigned string constant '{rval.value}' to {var_name}") - else: - logger.info("Unsupported constant type") - elif isinstance(rval, ast.Call): - if isinstance(rval.func, ast.Name): - call_type = rval.func.id - logger.info(f"Assignment call type: {call_type}") - if ( - call_type in num_types - and len(rval.args) == 1 - and isinstance(rval.args[0], ast.Constant) - and isinstance(rval.args[0].value, int) - ): - ir_type = ctypes_to_ir(call_type) - # var = builder.alloca(ir_type, name=var_name) - # var.align = ir_type.width // 8 - builder.store( - ir.Constant(ir_type, rval.args[0].value), - local_sym_tab[var_name].var, - ) - logger.info( - f"Assigned {call_type} constant {rval.args[0].value} to {var_name}" - ) - elif HelperHandlerRegistry.has_handler(call_type): - # var = builder.alloca(ir.IntType(64), name=var_name) - # var.align = 8 - val = handle_helper_call( - rval, - module, - builder, - func, - local_sym_tab, - map_sym_tab, - structs_sym_tab, - ) - builder.store(val[0], local_sym_tab[var_name].var) - logger.info(f"Assigned constant {rval.func.id} to {var_name}") - elif call_type == "deref" and len(rval.args) == 1: - logger.info(f"Handling deref assignment {ast.dump(rval)}") - val = eval_expr( - func, - module, - builder, - rval, - local_sym_tab, - map_sym_tab, - structs_sym_tab, - ) - if val is None: - logger.info("Failed to evaluate deref argument") - return - logger.info(f"Dereferenced value: {val}, storing in {var_name}") - builder.store(val[0], local_sym_tab[var_name].var) - logger.info(f"Dereferenced and assigned to {var_name}") - elif call_type in structs_sym_tab and len(rval.args) == 0: - struct_info = structs_sym_tab[call_type] - ir_type = struct_info.ir_type - # var = builder.alloca(ir_type, name=var_name) - # Null init - builder.store(ir.Constant(ir_type, None), local_sym_tab[var_name].var) - logger.info(f"Assigned struct {call_type} to {var_name}") - else: - logger.info(f"Unsupported assignment call type: {call_type}") - elif isinstance(rval.func, ast.Attribute): - logger.info(f"Assignment call attribute: {ast.dump(rval.func)}") - if isinstance(rval.func.value, ast.Name): - if rval.func.value.id in map_sym_tab: - map_name = rval.func.value.id - method_name = rval.func.attr - if HelperHandlerRegistry.has_handler(method_name): - val = handle_helper_call( - rval, - module, - builder, - func, - local_sym_tab, - map_sym_tab, - structs_sym_tab, - ) - builder.store(val[0], local_sym_tab[var_name].var) - else: - # TODO: probably a struct access - logger.info(f"TODO STRUCT ACCESS {ast.dump(rval)}") - elif isinstance(rval.func.value, ast.Call) and isinstance( - rval.func.value.func, ast.Name - ): - map_name = rval.func.value.func.id - method_name = rval.func.attr - if map_name in map_sym_tab: - if HelperHandlerRegistry.has_handler(method_name): - val = handle_helper_call( - rval, - module, - builder, - func, - local_sym_tab, - map_sym_tab, - structs_sym_tab, - ) - # var = builder.alloca(ir.IntType(64), name=var_name) - # var.align = 8 - builder.store(val[0], local_sym_tab[var_name].var) - else: - logger.info("Unsupported assignment call structure") - else: - logger.info("Unsupported assignment call function type") - elif isinstance(rval, ast.BinOp): - handle_binary_op(rval, builder, var_name, local_sym_tab) - else: - logger.info("Unsupported assignment value type") + if isinstance(struct_info.field_type(field_name), ir.ArrayType) and val[ + 1 + ] == ir.PointerType(ir.IntType(8)): + # TODO: Figure it out, not a priority rn + # Special case for string assignment to char array + # str_len = struct_info["field_types"][field_idx].count + # assign_string_to_array(builder, field_ptr, val[0], str_len) + # print(f"Assigned to struct field {var_name}.{field_name}") + pass + if val is None: + logger.info("Failed to evaluate struct field assignment") + return + logger.info(field_ptr) + builder.store(val[0], field_ptr) + logger.info(f"Assigned to struct field {var_name}.{field_name}") + return def handle_cond( From 3ad1b73c5a9f99a094da3bf522aa30feb0960adc Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 10:19:52 +0530 Subject: [PATCH 37/43] Add handle_struct_field_assignment to assign_pass --- pythonbpf/assign_pass.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/pythonbpf/assign_pass.py b/pythonbpf/assign_pass.py index 969870d0..3cad93e9 100644 --- a/pythonbpf/assign_pass.py +++ b/pythonbpf/assign_pass.py @@ -6,6 +6,46 @@ logger = logging.getLogger(__name__) +def handle_struct_field_assignment( + func, module, builder, target, rval, local_sym_tab, structs_sym_tab +): + """Handle struct field assignment (obj.field = value).""" + + var_name = target.value.id + field_name = target.attr + + if var_name not in local_sym_tab: + logger.error(f"Variable '{var_name}' not found in symbol table") + return + + struct_type = local_sym_tab[var_name].metadata + struct_info = structs_sym_tab[struct_type] + + if field_name not in struct_info.fields: + logger.error(f"Field '{field_name}' not found in struct '{struct_type}'") + return + + # Get field pointer and evaluate value + field_ptr = struct_info.gep(builder, local_sym_tab[var_name].var, field_name) + val = eval_expr(func, module, builder, rval, local_sym_tab, None, structs_sym_tab) + + if val is None: + logger.error(f"Failed to evaluate value for {var_name}.{field_name}") + return + + # TODO: Handle string assignment to char array (not a priority) + field_type = struct_info.field_type(field_name) + if isinstance(field_type, ir.ArrayType) and val[1] == ir.PointerType(ir.IntType(8)): + logger.warning( + f"String to char array assignment not implemented for {var_name}.{field_name}" + ) + return + + # Store the value + builder.store(val[0], field_ptr) + logger.info(f"Assigned to struct field {var_name}.{field_name}") + + def handle_variable_assignment( func, module, builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab ): From 64e44d0d58bd3de375b6ea0d45511953ae45d9bc Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 10:30:46 +0530 Subject: [PATCH 38/43] Use handle_struct_field_assignment in handle_assign --- pythonbpf/functions/functions_pass.py | 51 ++++++--------------------- 1 file changed, 11 insertions(+), 40 deletions(-) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 23317b77..4a91333c 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -10,7 +10,10 @@ ) from pythonbpf.type_deducer import ctypes_to_ir from pythonbpf.expr import eval_expr, handle_expr, convert_to_bool -from pythonbpf.assign_pass import handle_variable_assignment +from pythonbpf.assign_pass import ( + handle_variable_assignment, + handle_struct_field_assignment, +) from .return_utils import _handle_none_return, _handle_xdp_return, _is_xdp_name @@ -78,47 +81,15 @@ def handle_assign( logger.error(f"Failed to handle assignment to {var_name}") return - logger.info(f"Handling assignment to {ast.dump(target)}") - if not isinstance(target, ast.Name) and not isinstance(target, ast.Attribute): - logger.info("Unsupported assignment target") + if isinstance(target, ast.Attribute): + # NOTE: Struct field assignment case: pkt.field = value + handle_struct_field_assignment( + func, module, builder, target, rval, local_sym_tab, structs_sym_tab + ) return - var_name = target.id if isinstance(target, ast.Name) else target.value.id - rval = stmt.value - # struct field assignment - field_name = target.attr - if var_name in local_sym_tab: - struct_type = local_sym_tab[var_name].metadata - struct_info = structs_sym_tab[struct_type] - if field_name in struct_info.fields: - field_ptr = struct_info.gep( - builder, local_sym_tab[var_name].var, field_name - ) - val = eval_expr( - func, - module, - builder, - rval, - local_sym_tab, - map_sym_tab, - structs_sym_tab, - ) - if isinstance(struct_info.field_type(field_name), ir.ArrayType) and val[ - 1 - ] == ir.PointerType(ir.IntType(8)): - # TODO: Figure it out, not a priority rn - # Special case for string assignment to char array - # str_len = struct_info["field_types"][field_idx].count - # assign_string_to_array(builder, field_ptr, val[0], str_len) - # print(f"Assigned to struct field {var_name}.{field_name}") - pass - if val is None: - logger.info("Failed to evaluate struct field assignment") - return - logger.info(field_ptr) - builder.store(val[0], field_ptr) - logger.info(f"Assigned to struct field {var_name}.{field_name}") - return + # Unsupported target type + logger.error(f"Unsupported assignment target: {ast.dump(target)}") def handle_cond( From 08c0ccf0ac23a3006fa675de359bc27cb4152c8e Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 10:37:20 +0530 Subject: [PATCH 39/43] Pass map_sym_tab to handle_struct_field_assign --- pythonbpf/assign_pass.py | 6 ++++-- pythonbpf/functions/functions_pass.py | 9 ++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pythonbpf/assign_pass.py b/pythonbpf/assign_pass.py index 3cad93e9..ab091415 100644 --- a/pythonbpf/assign_pass.py +++ b/pythonbpf/assign_pass.py @@ -7,7 +7,7 @@ def handle_struct_field_assignment( - func, module, builder, target, rval, local_sym_tab, structs_sym_tab + func, module, builder, target, rval, local_sym_tab, map_sym_tab, structs_sym_tab ): """Handle struct field assignment (obj.field = value).""" @@ -27,7 +27,9 @@ def handle_struct_field_assignment( # Get field pointer and evaluate value field_ptr = struct_info.gep(builder, local_sym_tab[var_name].var, field_name) - val = eval_expr(func, module, builder, rval, local_sym_tab, None, structs_sym_tab) + val = eval_expr( + func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab + ) if val is None: logger.error(f"Failed to evaluate value for {var_name}.{field_name}") diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 4a91333c..68314f1e 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -84,7 +84,14 @@ def handle_assign( if isinstance(target, ast.Attribute): # NOTE: Struct field assignment case: pkt.field = value handle_struct_field_assignment( - func, module, builder, target, rval, local_sym_tab, structs_sym_tab + func, + module, + builder, + target, + rval, + local_sym_tab, + map_sym_tab, + structs_sym_tab, ) return From 0f6971bcc229abbe87ee2b3effa91f8219f0e068 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 11:34:40 +0530 Subject: [PATCH 40/43] Refactor allocate_mem --- pythonbpf/functions/functions_pass.py | 344 ++++++++++++++++---------- 1 file changed, 212 insertions(+), 132 deletions(-) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 68314f1e..a024ca5c 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -220,6 +220,203 @@ def process_stmt( return did_return +def _is_helper_call(call_node): + """Check if a call node is a BPF helper function call.""" + if isinstance(call_node.func, ast.Name): + # Exclude print from requiring temps (handles f-strings differently) + func_name = call_node.func.id + return HelperHandlerRegistry.has_handler(func_name) and func_name != "print" + + elif isinstance(call_node.func, ast.Attribute): + return HelperHandlerRegistry.has_handler(call_node.func.attr) + + return False + + +def _handle_if_allocation( + module, builder, stmt, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab +): + """Recursively handle allocations in if/else branches.""" + if stmt.body: + allocate_mem( + module, + builder, + stmt.body, + func, + ret_type, + map_sym_tab, + local_sym_tab, + structs_sym_tab, + ) + if stmt.orelse: + allocate_mem( + module, + builder, + stmt.orelse, + func, + ret_type, + map_sym_tab, + local_sym_tab, + structs_sym_tab, + ) + + +def _handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab): + """Handle memory allocation for assignment statements.""" + + # Validate assignment + if len(stmt.targets) != 1: + logger.warning("Multi-target assignment not supported, skipping allocation") + return + + target = stmt.targets[0] + + # Skip non-name targets (e.g., struct field assignments) + if isinstance(target, ast.Attribute): + logger.debug(f"Struct field assignment to {target.attr}, no allocation needed") + return + + if not isinstance(target, ast.Name): + logger.warning(f"Unsupported assignment target type: {type(target).__name__}") + return + + var_name = target.id + rval = stmt.value + + # Skip if already allocated + if var_name in local_sym_tab: + logger.debug(f"Variable {var_name} already allocated, skipping") + return + + # Determine type and allocate based on rval + if isinstance(rval, ast.Call): + _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab) + elif isinstance(rval, ast.Constant): + _allocate_for_constant(builder, var_name, rval, local_sym_tab) + elif isinstance(rval, ast.BinOp): + _allocate_for_binop(builder, var_name, local_sym_tab) + else: + logger.warning( + f"Unsupported assignment value type for {var_name}: {type(rval).__name__}" + ) + + +def _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab): + """Allocate memory for variable assigned from a call.""" + + if isinstance(rval.func, ast.Name): + call_type = rval.func.id + + # C type constructors + if call_type in ("c_int32", "c_int64", "c_uint32", "c_uint64"): + ir_type = ctypes_to_ir(call_type) + var = builder.alloca(ir_type, name=var_name) + var.align = ir_type.width // 8 + local_sym_tab[var_name] = LocalSymbol(var, ir_type) + logger.info(f"Pre-allocated {var_name} as {call_type}") + + # Helper functions + elif HelperHandlerRegistry.has_handler(call_type): + ir_type = ir.IntType(64) # Assume i64 return type + var = builder.alloca(ir_type, name=var_name) + var.align = 8 + local_sym_tab[var_name] = LocalSymbol(var, ir_type) + logger.info(f"Pre-allocated {var_name} for helper {call_type}") + + # Deref function + elif call_type == "deref": + ir_type = ir.IntType(64) # Assume i64 return type + var = builder.alloca(ir_type, name=var_name) + var.align = 8 + local_sym_tab[var_name] = LocalSymbol(var, ir_type) + logger.info(f"Pre-allocated {var_name} for deref") + + # Struct constructors + elif call_type in structs_sym_tab: + struct_info = structs_sym_tab[call_type] + var = builder.alloca(struct_info.ir_type, name=var_name) + local_sym_tab[var_name] = LocalSymbol(var, struct_info.ir_type, call_type) + logger.info(f"Pre-allocated {var_name} for struct {call_type}") + + else: + logger.warning(f"Unknown call type for allocation: {call_type}") + + elif isinstance(rval.func, ast.Attribute): + # Map method calls - need double allocation for ptr handling + _allocate_for_map_method(builder, var_name, local_sym_tab) + + else: + logger.warning(f"Unsupported call function type for {var_name}") + + +def _allocate_for_map_method(builder, var_name, local_sym_tab): + """Allocate memory for variable assigned from map method (double alloc).""" + + # Main variable (pointer to pointer) + ir_type = ir.PointerType(ir.IntType(64)) + var = builder.alloca(ir_type, name=var_name) + local_sym_tab[var_name] = LocalSymbol(var, ir_type) + + # Temporary variable for computed values + tmp_ir_type = ir.IntType(64) + var_tmp = builder.alloca(tmp_ir_type, name=f"{var_name}_tmp") + local_sym_tab[f"{var_name}_tmp"] = LocalSymbol(var_tmp, tmp_ir_type) + + logger.info(f"Pre-allocated {var_name} and {var_name}_tmp for map method") + + +def _allocate_for_constant(builder, var_name, rval, local_sym_tab): + """Allocate memory for variable assigned from a constant.""" + + if isinstance(rval.value, bool): + ir_type = ir.IntType(1) + var = builder.alloca(ir_type, name=var_name) + var.align = 1 + local_sym_tab[var_name] = LocalSymbol(var, ir_type) + logger.info(f"Pre-allocated {var_name} as bool") + + elif isinstance(rval.value, int): + ir_type = ir.IntType(64) + var = builder.alloca(ir_type, name=var_name) + var.align = 8 + local_sym_tab[var_name] = LocalSymbol(var, ir_type) + logger.info(f"Pre-allocated {var_name} as i64") + + elif isinstance(rval.value, str): + ir_type = ir.PointerType(ir.IntType(8)) + var = builder.alloca(ir_type, name=var_name) + var.align = 8 + local_sym_tab[var_name] = LocalSymbol(var, ir_type) + logger.info(f"Pre-allocated {var_name} as string") + + else: + logger.warning( + f"Unsupported constant type for {var_name}: {type(rval.value).__name__}" + ) + + +def _allocate_for_binop(builder, var_name, local_sym_tab): + """Allocate memory for variable assigned from a binary operation.""" + ir_type = ir.IntType(64) # Assume i64 result + var = builder.alloca(ir_type, name=var_name) + var.align = 8 + local_sym_tab[var_name] = LocalSymbol(var, ir_type) + logger.info(f"Pre-allocated {var_name} for binop result") + + +def _allocate_temp_pool(builder, max_temps, local_sym_tab): + """Allocate the temporary scratch space pool for helper arguments.""" + if max_temps == 0: + return + + logger.info(f"Allocating temp pool of {max_temps} variables") + for i in range(max_temps): + temp_name = f"__helper_temp_{i}" + temp_var = builder.alloca(ir.IntType(64), name=temp_name) + temp_var.align = 8 + local_sym_tab[temp_name] = LocalSymbol(temp_var, ir.IntType(64)) + + def count_temps_in_call(call_node, local_sym_tab): """Count the number of temporary variables needed for a function call.""" @@ -255,7 +452,6 @@ def count_temps_in_call(call_node, local_sym_tab): def allocate_mem( module, builder, body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab ): - double_alloc = False max_temps_needed = 0 def update_max_temps_for_stmt(stmt): @@ -276,139 +472,23 @@ def update_max_temps_for_stmt(stmt): for stmt in body: update_max_temps_for_stmt(stmt) - has_metadata = False + + # Handle allocations if isinstance(stmt, ast.If): - if stmt.body: - local_sym_tab = allocate_mem( - module, - builder, - stmt.body, - func, - ret_type, - map_sym_tab, - local_sym_tab, - structs_sym_tab, - ) - if stmt.orelse: - local_sym_tab = allocate_mem( - module, - builder, - stmt.orelse, - func, - ret_type, - map_sym_tab, - local_sym_tab, - structs_sym_tab, - ) + _handle_if_allocation( + module, + builder, + stmt, + func, + ret_type, + map_sym_tab, + local_sym_tab, + structs_sym_tab, + ) elif isinstance(stmt, ast.Assign): - if len(stmt.targets) != 1: - logger.info("Unsupported multiassignment") - continue - target = stmt.targets[0] - if not isinstance(target, ast.Name) and not isinstance( - target, ast.Attribute - ): - logger.info("Unsupported assignment target") - continue - if isinstance(target, ast.Attribute): - logger.info( - f"Struct field {target.attr} assignment, will be handled later" - ) - continue - var_name = target.id - rval = stmt.value - if var_name in local_sym_tab: - logger.info(f"Variable {var_name} already allocated") - continue - if isinstance(rval, ast.Call): - if isinstance(rval.func, ast.Name): - call_type = rval.func.id - if call_type in ("c_int32", "c_int64", "c_uint32", "c_uint64"): - ir_type = ctypes_to_ir(call_type) - var = builder.alloca(ir_type, name=var_name) - var.align = ir_type.width // 8 - logger.info( - f"Pre-allocated variable {var_name} of type {call_type}" - ) - elif HelperHandlerRegistry.has_handler(call_type): - # Assume return type is int64 for now - ir_type = ir.IntType(64) - var = builder.alloca(ir_type, name=var_name) - var.align = ir_type.width // 8 - logger.info(f"Pre-allocated variable {var_name} for helper") - elif call_type == "deref" and len(rval.args) == 1: - # Assume return type is int64 for now - ir_type = ir.IntType(64) - var = builder.alloca(ir_type, name=var_name) - var.align = ir_type.width // 8 - logger.info(f"Pre-allocated variable {var_name} for deref") - elif call_type in structs_sym_tab: - struct_info = structs_sym_tab[call_type] - ir_type = struct_info.ir_type - var = builder.alloca(ir_type, name=var_name) - has_metadata = True - logger.info( - f"Pre-allocated variable {var_name} for struct {call_type}" - ) - elif isinstance(rval.func, ast.Attribute): - # Map method call - ir_type = ir.PointerType(ir.IntType(64)) - var = builder.alloca(ir_type, name=var_name) - - # declare an intermediate ptr type for map lookup - tmp_ir_type = ir.IntType(64) - var_tmp = builder.alloca(tmp_ir_type, name=f"{var_name}_tmp") - double_alloc = True - # var.align = ir_type.width // 8 - logger.info( - f"Pre-allocated variable {var_name} and {var_name}_tmp for map" - ) - else: - logger.info("Unsupported assignment call function type") - continue - elif isinstance(rval, ast.Constant): - if isinstance(rval.value, bool): - ir_type = ir.IntType(1) - var = builder.alloca(ir_type, name=var_name) - var.align = 1 - logger.info(f"Pre-allocated variable {var_name} of type c_bool") - elif isinstance(rval.value, int): - # Assume c_int64 for now - ir_type = ir.IntType(64) - var = builder.alloca(ir_type, name=var_name) - var.align = ir_type.width // 8 - logger.info(f"Pre-allocated variable {var_name} of type c_int64") - elif isinstance(rval.value, str): - ir_type = ir.PointerType(ir.IntType(8)) - var = builder.alloca(ir_type, name=var_name) - var.align = 8 - logger.info(f"Pre-allocated variable {var_name} of type string") - else: - logger.info("Unsupported constant type") - continue - elif isinstance(rval, ast.BinOp): - # Assume c_int64 for now - ir_type = ir.IntType(64) - var = builder.alloca(ir_type, name=var_name) - var.align = ir_type.width // 8 - logger.info(f"Pre-allocated variable {var_name} of type c_int64") - else: - logger.info("Unsupported assignment value type") - continue - - if has_metadata: - local_sym_tab[var_name] = LocalSymbol(var, ir_type, call_type) - else: - local_sym_tab[var_name] = LocalSymbol(var, ir_type) - - if double_alloc: - local_sym_tab[f"{var_name}_tmp"] = LocalSymbol(var_tmp, tmp_ir_type) - - logger.info(f"Temporary scratch space needed for calls: {max_temps_needed}") - for i in range(max_temps_needed): - temp_var = builder.alloca(ir.IntType(64), name=f"__helper_temp_{i}") - temp_var.align = 8 - local_sym_tab[f"__helper_temp_{i}"] = LocalSymbol(temp_var, ir.IntType(64)) + _handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab) + + _allocate_temp_pool(builder, max_temps_needed, local_sym_tab) return local_sym_tab From 2f1aaa4834d4c7ba991da40e51394d7bf92e8482 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 11:41:01 +0530 Subject: [PATCH 41/43] Fix typos --- pythonbpf/helper/bpf_helper_handler.py | 2 +- tests/passing_tests/assign/comprehensive.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pythonbpf/helper/bpf_helper_handler.py b/pythonbpf/helper/bpf_helper_handler.py index 44731d71..79cbf266 100644 --- a/pythonbpf/helper/bpf_helper_handler.py +++ b/pythonbpf/helper/bpf_helper_handler.py @@ -71,7 +71,7 @@ def bpf_map_lookup_elem_emitter( ) map_void_ptr = builder.bitcast(map_ptr, ir.PointerType()) - # TODO: I have changed the return typr to i64*, as we are + # TODO: I have changed the return type to i64*, as we are # allocating space for that type in allocate_mem. This is # temporary, and we will honour other widths later. But this # allows us to have cool binary ops on the returned value. diff --git a/tests/passing_tests/assign/comprehensive.py b/tests/passing_tests/assign/comprehensive.py index 625fb81d..6e53a3f5 100644 --- a/tests/passing_tests/assign/comprehensive.py +++ b/tests/passing_tests/assign/comprehensive.py @@ -6,7 +6,7 @@ # NOTE: This is a comprehensive test combining struct, helper, and map features # Please note that at line 50, though we have used an absurd expression to test -# the comiler, it is recommended to used named variables to reduce the amount of +# the compiler, it is recommended to use named variables to reduce the amount of # scratch space that needs to be allocated. @bpf From 69bee5fee960ae6627f1c4b94b9eb015a4be3f4b Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 12:10:09 +0530 Subject: [PATCH 42/43] Seperate LocalSymbol from functions --- pythonbpf/local_symbol.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 pythonbpf/local_symbol.py diff --git a/pythonbpf/local_symbol.py b/pythonbpf/local_symbol.py new file mode 100644 index 00000000..d0f76d5f --- /dev/null +++ b/pythonbpf/local_symbol.py @@ -0,0 +1,15 @@ +from llvm import ir +from dataclasses import dataclass +from typing import Any + + +@dataclass +class LocalSymbol: + var: ir.AllocaInstr + ir_type: ir.Type + metadata: Any = None + + def __iter__(self): + yield self.var + yield self.ir_type + yield self.metadata From e0ad1bfb0ff00b4bd143a5c3e9460d31b13c78b8 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Sun, 12 Oct 2025 12:14:46 +0530 Subject: [PATCH 43/43] Move bulk of allocation logic to allocation_pass --- pythonbpf/allocation_pass.py | 191 +++++++++++++++++++++++++ pythonbpf/functions/functions_pass.py | 192 +------------------------- pythonbpf/local_symbol.py | 15 -- 3 files changed, 196 insertions(+), 202 deletions(-) create mode 100644 pythonbpf/allocation_pass.py delete mode 100644 pythonbpf/local_symbol.py diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py new file mode 100644 index 00000000..5ec631a2 --- /dev/null +++ b/pythonbpf/allocation_pass.py @@ -0,0 +1,191 @@ +import ast +import logging + +from llvmlite import ir +from dataclasses import dataclass +from typing import Any +from pythonbpf.helper import HelperHandlerRegistry +from pythonbpf.type_deducer import ctypes_to_ir + +logger = logging.getLogger(__name__) + + +@dataclass +class LocalSymbol: + var: ir.AllocaInstr + ir_type: ir.Type + metadata: Any = None + + def __iter__(self): + yield self.var + yield self.ir_type + yield self.metadata + + +def _is_helper_call(call_node): + """Check if a call node is a BPF helper function call.""" + if isinstance(call_node.func, ast.Name): + # Exclude print from requiring temps (handles f-strings differently) + func_name = call_node.func.id + return HelperHandlerRegistry.has_handler(func_name) and func_name != "print" + + elif isinstance(call_node.func, ast.Attribute): + return HelperHandlerRegistry.has_handler(call_node.func.attr) + + return False + + +def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab): + """Handle memory allocation for assignment statements.""" + + # Validate assignment + if len(stmt.targets) != 1: + logger.warning("Multi-target assignment not supported, skipping allocation") + return + + target = stmt.targets[0] + + # Skip non-name targets (e.g., struct field assignments) + if isinstance(target, ast.Attribute): + logger.debug(f"Struct field assignment to {target.attr}, no allocation needed") + return + + if not isinstance(target, ast.Name): + logger.warning(f"Unsupported assignment target type: {type(target).__name__}") + return + + var_name = target.id + rval = stmt.value + + # Skip if already allocated + if var_name in local_sym_tab: + logger.debug(f"Variable {var_name} already allocated, skipping") + return + + # Determine type and allocate based on rval + if isinstance(rval, ast.Call): + _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab) + elif isinstance(rval, ast.Constant): + _allocate_for_constant(builder, var_name, rval, local_sym_tab) + elif isinstance(rval, ast.BinOp): + _allocate_for_binop(builder, var_name, local_sym_tab) + else: + logger.warning( + f"Unsupported assignment value type for {var_name}: {type(rval).__name__}" + ) + + +def _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab): + """Allocate memory for variable assigned from a call.""" + + if isinstance(rval.func, ast.Name): + call_type = rval.func.id + + # C type constructors + if call_type in ("c_int32", "c_int64", "c_uint32", "c_uint64"): + ir_type = ctypes_to_ir(call_type) + var = builder.alloca(ir_type, name=var_name) + var.align = ir_type.width // 8 + local_sym_tab[var_name] = LocalSymbol(var, ir_type) + logger.info(f"Pre-allocated {var_name} as {call_type}") + + # Helper functions + elif HelperHandlerRegistry.has_handler(call_type): + ir_type = ir.IntType(64) # Assume i64 return type + var = builder.alloca(ir_type, name=var_name) + var.align = 8 + local_sym_tab[var_name] = LocalSymbol(var, ir_type) + logger.info(f"Pre-allocated {var_name} for helper {call_type}") + + # Deref function + elif call_type == "deref": + ir_type = ir.IntType(64) # Assume i64 return type + var = builder.alloca(ir_type, name=var_name) + var.align = 8 + local_sym_tab[var_name] = LocalSymbol(var, ir_type) + logger.info(f"Pre-allocated {var_name} for deref") + + # Struct constructors + elif call_type in structs_sym_tab: + struct_info = structs_sym_tab[call_type] + var = builder.alloca(struct_info.ir_type, name=var_name) + local_sym_tab[var_name] = LocalSymbol(var, struct_info.ir_type, call_type) + logger.info(f"Pre-allocated {var_name} for struct {call_type}") + + else: + logger.warning(f"Unknown call type for allocation: {call_type}") + + elif isinstance(rval.func, ast.Attribute): + # Map method calls - need double allocation for ptr handling + _allocate_for_map_method(builder, var_name, local_sym_tab) + + else: + logger.warning(f"Unsupported call function type for {var_name}") + + +def _allocate_for_map_method(builder, var_name, local_sym_tab): + """Allocate memory for variable assigned from map method (double alloc).""" + + # Main variable (pointer to pointer) + ir_type = ir.PointerType(ir.IntType(64)) + var = builder.alloca(ir_type, name=var_name) + local_sym_tab[var_name] = LocalSymbol(var, ir_type) + + # Temporary variable for computed values + tmp_ir_type = ir.IntType(64) + var_tmp = builder.alloca(tmp_ir_type, name=f"{var_name}_tmp") + local_sym_tab[f"{var_name}_tmp"] = LocalSymbol(var_tmp, tmp_ir_type) + + logger.info(f"Pre-allocated {var_name} and {var_name}_tmp for map method") + + +def _allocate_for_constant(builder, var_name, rval, local_sym_tab): + """Allocate memory for variable assigned from a constant.""" + + if isinstance(rval.value, bool): + ir_type = ir.IntType(1) + var = builder.alloca(ir_type, name=var_name) + var.align = 1 + local_sym_tab[var_name] = LocalSymbol(var, ir_type) + logger.info(f"Pre-allocated {var_name} as bool") + + elif isinstance(rval.value, int): + ir_type = ir.IntType(64) + var = builder.alloca(ir_type, name=var_name) + var.align = 8 + local_sym_tab[var_name] = LocalSymbol(var, ir_type) + logger.info(f"Pre-allocated {var_name} as i64") + + elif isinstance(rval.value, str): + ir_type = ir.PointerType(ir.IntType(8)) + var = builder.alloca(ir_type, name=var_name) + var.align = 8 + local_sym_tab[var_name] = LocalSymbol(var, ir_type) + logger.info(f"Pre-allocated {var_name} as string") + + else: + logger.warning( + f"Unsupported constant type for {var_name}: {type(rval.value).__name__}" + ) + + +def _allocate_for_binop(builder, var_name, local_sym_tab): + """Allocate memory for variable assigned from a binary operation.""" + ir_type = ir.IntType(64) # Assume i64 result + var = builder.alloca(ir_type, name=var_name) + var.align = 8 + local_sym_tab[var_name] = LocalSymbol(var, ir_type) + logger.info(f"Pre-allocated {var_name} for binop result") + + +def allocate_temp_pool(builder, max_temps, local_sym_tab): + """Allocate the temporary scratch space pool for helper arguments.""" + if max_temps == 0: + return + + logger.info(f"Allocating temp pool of {max_temps} variables") + for i in range(max_temps): + temp_name = f"__helper_temp_{i}" + temp_var = builder.alloca(ir.IntType(64), name=temp_name) + temp_var.align = 8 + local_sym_tab[temp_name] = LocalSymbol(temp_var, ir.IntType(64)) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index a024ca5c..45d7b0ae 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -1,8 +1,6 @@ from llvmlite import ir import ast import logging -from typing import Any -from dataclasses import dataclass from pythonbpf.helper import ( HelperHandlerRegistry, @@ -14,6 +12,7 @@ handle_variable_assignment, handle_struct_field_assignment, ) +from pythonbpf.allocation_pass import handle_assign_allocation, allocate_temp_pool from .return_utils import _handle_none_return, _handle_xdp_return, _is_xdp_name @@ -21,18 +20,6 @@ logger = logging.getLogger(__name__) -@dataclass -class LocalSymbol: - var: ir.AllocaInstr - ir_type: ir.Type - metadata: Any = None - - def __iter__(self): - yield self.var - yield self.ir_type - yield self.metadata - - def get_probe_string(func_node): """Extract the probe string from the decorator of the function node.""" # TODO: right now we have the whole string in the section decorator @@ -220,20 +207,7 @@ def process_stmt( return did_return -def _is_helper_call(call_node): - """Check if a call node is a BPF helper function call.""" - if isinstance(call_node.func, ast.Name): - # Exclude print from requiring temps (handles f-strings differently) - func_name = call_node.func.id - return HelperHandlerRegistry.has_handler(func_name) and func_name != "print" - - elif isinstance(call_node.func, ast.Attribute): - return HelperHandlerRegistry.has_handler(call_node.func.attr) - - return False - - -def _handle_if_allocation( +def handle_if_allocation( module, builder, stmt, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab ): """Recursively handle allocations in if/else branches.""" @@ -261,162 +235,6 @@ def _handle_if_allocation( ) -def _handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab): - """Handle memory allocation for assignment statements.""" - - # Validate assignment - if len(stmt.targets) != 1: - logger.warning("Multi-target assignment not supported, skipping allocation") - return - - target = stmt.targets[0] - - # Skip non-name targets (e.g., struct field assignments) - if isinstance(target, ast.Attribute): - logger.debug(f"Struct field assignment to {target.attr}, no allocation needed") - return - - if not isinstance(target, ast.Name): - logger.warning(f"Unsupported assignment target type: {type(target).__name__}") - return - - var_name = target.id - rval = stmt.value - - # Skip if already allocated - if var_name in local_sym_tab: - logger.debug(f"Variable {var_name} already allocated, skipping") - return - - # Determine type and allocate based on rval - if isinstance(rval, ast.Call): - _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab) - elif isinstance(rval, ast.Constant): - _allocate_for_constant(builder, var_name, rval, local_sym_tab) - elif isinstance(rval, ast.BinOp): - _allocate_for_binop(builder, var_name, local_sym_tab) - else: - logger.warning( - f"Unsupported assignment value type for {var_name}: {type(rval).__name__}" - ) - - -def _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab): - """Allocate memory for variable assigned from a call.""" - - if isinstance(rval.func, ast.Name): - call_type = rval.func.id - - # C type constructors - if call_type in ("c_int32", "c_int64", "c_uint32", "c_uint64"): - ir_type = ctypes_to_ir(call_type) - var = builder.alloca(ir_type, name=var_name) - var.align = ir_type.width // 8 - local_sym_tab[var_name] = LocalSymbol(var, ir_type) - logger.info(f"Pre-allocated {var_name} as {call_type}") - - # Helper functions - elif HelperHandlerRegistry.has_handler(call_type): - ir_type = ir.IntType(64) # Assume i64 return type - var = builder.alloca(ir_type, name=var_name) - var.align = 8 - local_sym_tab[var_name] = LocalSymbol(var, ir_type) - logger.info(f"Pre-allocated {var_name} for helper {call_type}") - - # Deref function - elif call_type == "deref": - ir_type = ir.IntType(64) # Assume i64 return type - var = builder.alloca(ir_type, name=var_name) - var.align = 8 - local_sym_tab[var_name] = LocalSymbol(var, ir_type) - logger.info(f"Pre-allocated {var_name} for deref") - - # Struct constructors - elif call_type in structs_sym_tab: - struct_info = structs_sym_tab[call_type] - var = builder.alloca(struct_info.ir_type, name=var_name) - local_sym_tab[var_name] = LocalSymbol(var, struct_info.ir_type, call_type) - logger.info(f"Pre-allocated {var_name} for struct {call_type}") - - else: - logger.warning(f"Unknown call type for allocation: {call_type}") - - elif isinstance(rval.func, ast.Attribute): - # Map method calls - need double allocation for ptr handling - _allocate_for_map_method(builder, var_name, local_sym_tab) - - else: - logger.warning(f"Unsupported call function type for {var_name}") - - -def _allocate_for_map_method(builder, var_name, local_sym_tab): - """Allocate memory for variable assigned from map method (double alloc).""" - - # Main variable (pointer to pointer) - ir_type = ir.PointerType(ir.IntType(64)) - var = builder.alloca(ir_type, name=var_name) - local_sym_tab[var_name] = LocalSymbol(var, ir_type) - - # Temporary variable for computed values - tmp_ir_type = ir.IntType(64) - var_tmp = builder.alloca(tmp_ir_type, name=f"{var_name}_tmp") - local_sym_tab[f"{var_name}_tmp"] = LocalSymbol(var_tmp, tmp_ir_type) - - logger.info(f"Pre-allocated {var_name} and {var_name}_tmp for map method") - - -def _allocate_for_constant(builder, var_name, rval, local_sym_tab): - """Allocate memory for variable assigned from a constant.""" - - if isinstance(rval.value, bool): - ir_type = ir.IntType(1) - var = builder.alloca(ir_type, name=var_name) - var.align = 1 - local_sym_tab[var_name] = LocalSymbol(var, ir_type) - logger.info(f"Pre-allocated {var_name} as bool") - - elif isinstance(rval.value, int): - ir_type = ir.IntType(64) - var = builder.alloca(ir_type, name=var_name) - var.align = 8 - local_sym_tab[var_name] = LocalSymbol(var, ir_type) - logger.info(f"Pre-allocated {var_name} as i64") - - elif isinstance(rval.value, str): - ir_type = ir.PointerType(ir.IntType(8)) - var = builder.alloca(ir_type, name=var_name) - var.align = 8 - local_sym_tab[var_name] = LocalSymbol(var, ir_type) - logger.info(f"Pre-allocated {var_name} as string") - - else: - logger.warning( - f"Unsupported constant type for {var_name}: {type(rval.value).__name__}" - ) - - -def _allocate_for_binop(builder, var_name, local_sym_tab): - """Allocate memory for variable assigned from a binary operation.""" - ir_type = ir.IntType(64) # Assume i64 result - var = builder.alloca(ir_type, name=var_name) - var.align = 8 - local_sym_tab[var_name] = LocalSymbol(var, ir_type) - logger.info(f"Pre-allocated {var_name} for binop result") - - -def _allocate_temp_pool(builder, max_temps, local_sym_tab): - """Allocate the temporary scratch space pool for helper arguments.""" - if max_temps == 0: - return - - logger.info(f"Allocating temp pool of {max_temps} variables") - for i in range(max_temps): - temp_name = f"__helper_temp_{i}" - temp_var = builder.alloca(ir.IntType(64), name=temp_name) - temp_var.align = 8 - local_sym_tab[temp_name] = LocalSymbol(temp_var, ir.IntType(64)) - - def count_temps_in_call(call_node, local_sym_tab): """Count the number of temporary variables needed for a function call.""" @@ -475,7 +293,7 @@ def update_max_temps_for_stmt(stmt): # Handle allocations if isinstance(stmt, ast.If): - _handle_if_allocation( + handle_if_allocation( module, builder, stmt, @@ -486,9 +304,9 @@ def update_max_temps_for_stmt(stmt): structs_sym_tab, ) elif isinstance(stmt, ast.Assign): - _handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab) + handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab) - _allocate_temp_pool(builder, max_temps_needed, local_sym_tab) + allocate_temp_pool(builder, max_temps_needed, local_sym_tab) return local_sym_tab diff --git a/pythonbpf/local_symbol.py b/pythonbpf/local_symbol.py deleted file mode 100644 index d0f76d5f..00000000 --- a/pythonbpf/local_symbol.py +++ /dev/null @@ -1,15 +0,0 @@ -from llvm import ir -from dataclasses import dataclass -from typing import Any - - -@dataclass -class LocalSymbol: - var: ir.AllocaInstr - ir_type: ir.Type - metadata: Any = None - - def __iter__(self): - yield self.var - yield self.ir_type - yield self.metadata