diff --git a/examples/execve5.py b/examples/execve5.py index f716420..32063ff 100644 --- a/examples/execve5.py +++ b/examples/execve5.py @@ -10,6 +10,7 @@ class data_t: pid: c_uint64 ts: c_uint64 + comm: str(16) @bpf @@ -21,13 +22,14 @@ def events() -> PerfEventArray: @bpf @section("tracepoint/syscalls/sys_enter_clone") def hello(ctx: c_void_p) -> c_int32: - strobj = "Hi" dataobj = data_t() ts = ktime() process_id = pid() + strobj = "hellohellohello" dataobj.pid = process_id dataobj.ts = ts - print(f"clone called at {ts} by pid {process_id}") + # dataobj.comm = strobj + print(f"clone called at {ts} by pid {process_id}, comm {strobj}") events.output(dataobj) return c_int32(0) diff --git a/pythonbpf/bpf_helper_handler.py b/pythonbpf/bpf_helper_handler.py index 91dd81e..34b1f04 100644 --- a/pythonbpf/bpf_helper_handler.py +++ b/pythonbpf/bpf_helper_handler.py @@ -13,7 +13,7 @@ def bpf_ktime_get_ns_emitter(call, map_ptr, module, builder, func, local_sym_tab fn_ptr_type = ir.PointerType(fn_type) fn_ptr = builder.inttoptr(helper_id, fn_ptr_type) result = builder.call(fn_ptr, [], tail=False) - return result + return result, ir.IntType(64) def bpf_map_lookup_elem_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None): @@ -60,7 +60,7 @@ def bpf_map_lookup_elem_emitter(call, map_ptr, module, builder, func, local_sym_ result = builder.call(fn_ptr, [map_void_ptr, key_ptr], tail=False) - return result + return result, ir.PointerType() def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, local_var_metadata=None): @@ -75,6 +75,7 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, exprs = [] for value in call.args[0].values: + print("Value in f-string:", ast.dump(value)) if isinstance(value, ast.Constant): if isinstance(value.value, str): fmt_parts.append(value.value) @@ -86,10 +87,24 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, "Only string and integer constants are supported in f-string.") elif isinstance(value, ast.FormattedValue): print("Formatted value:", ast.dump(value)) - # Assume int for now - fmt_parts.append("%lld") + # TODO: Dirty handling here, only checks for int or str if isinstance(value.value, ast.Name): - exprs.append(value.value) + if local_sym_tab and value.value.id in local_sym_tab: + var_ptr, var_type = local_sym_tab[value.value.id] + if isinstance(var_type, ir.IntType): + fmt_parts.append("%lld") + exprs.append(value.value) + elif var_type == ir.PointerType(ir.IntType(8)): + # Case with string + fmt_parts.append("%s") + exprs.append(value.value) + else: + raise NotImplementedError( + "Only integer and pointer types are supported in formatted values.") + print("Formatted value variable:", var_ptr, var_type) + else: + raise ValueError( + f"Variable {value.value.id} not found in local symbol table.") else: raise NotImplementedError( "Only simple variable names are supported in formatted values.") @@ -121,7 +136,8 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, "Warning: bpf_printk supports up to 3 arguments, extra arguments will be ignored.") for expr in exprs[:3]: - val = eval_expr(func, module, builder, expr, local_sym_tab, None) + val, _ = eval_expr(func, module, builder, + expr, local_sym_tab, None) if val: if isinstance(val.type, ir.PointerType): val = builder.ptrtoint(val, ir.IntType(64)) @@ -137,7 +153,6 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, print( "Warning: Failed to evaluate expression for bpf_printk argument. It will be converted to 0.") args.append(ir.Constant(ir.IntType(64), 0)) - fn_type = ir.FunctionType(ir.IntType( 64), [ir.PointerType(), ir.IntType(32)], var_arg=True) fn_ptr_type = ir.PointerType(fn_type) @@ -266,7 +281,7 @@ def bpf_map_update_elem_emitter(call, map_ptr, module, builder, func, local_sym_ result = builder.call( fn_ptr, [map_void_ptr, key_ptr, value_ptr, flags_const], tail=False) - return result + return result, None def bpf_map_delete_elem_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None): @@ -321,7 +336,7 @@ def bpf_map_delete_elem_emitter(call, map_ptr, module, builder, func, local_sym_ # Call the helper function result = builder.call(fn_ptr, [map_void_ptr, key_ptr], tail=False) - return result + return result, None def bpf_get_current_pid_tgid_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, local_var_metadata=None): @@ -338,7 +353,7 @@ def bpf_get_current_pid_tgid_emitter(call, map_ptr, module, builder, func, local # Extract the lower 32 bits (PID) using bitwise AND with 0xFFFFFFFF mask = ir.Constant(ir.IntType(64), 0xFFFFFFFF) pid = builder.and_(result, mask) - return pid + return pid, ir.IntType(64) def bpf_perf_event_output_handler(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None): @@ -387,7 +402,7 @@ def bpf_perf_event_output_handler(call, map_ptr, module, builder, func, local_sy result = builder.call( fn_ptr, [ctx_ptr, map_void_ptr, flags_val, data_void_ptr, size_val], tail=False) - return result + return result, None else: raise NotImplementedError( "Only simple object names are supported as data in perf event output.") diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index df30cde..52a8044 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -93,6 +93,7 @@ def compile_to_ir(filename: str, output: str): module.add_named_metadata("llvm.ident", ["llvmlite PythonBPF v0.0.1"]) + print(f"IR written to {output}") with open(output, "w") as f: f.write(f"source_filename = \"{filename}\"\n") f.write(str(module)) @@ -118,6 +119,7 @@ def compile(): print(f"Object written to {o_file}, {ll_file} can be removed") + def BPF() -> BpfProgram: caller_frame = inspect.stack()[1] caller_file = Path(caller_frame.filename).resolve() @@ -129,5 +131,5 @@ def BPF() -> BpfProgram: "llc", "-march=bpf", "-filetype=obj", "-O2", str(ll_file), "-o", str(o_file) ], check=True) - + return BpfProgram(str(o_file)) diff --git a/pythonbpf/expr_pass.py b/pythonbpf/expr_pass.py index cc969a5..22641bf 100644 --- a/pythonbpf/expr_pass.py +++ b/pythonbpf/expr_pass.py @@ -3,20 +3,20 @@ def eval_expr(func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab=None, local_var_metadata=None): - print(f"Evaluating expression: {expr}") + print(f"Evaluating expression: {ast.dump(expr)}") if isinstance(expr, ast.Name): if expr.id in local_sym_tab: var = local_sym_tab[expr.id][0] val = builder.load(var) - return val + return val, local_sym_tab[expr.id][1] # return value and type else: print(f"Undefined variable {expr.id}") return None elif isinstance(expr, ast.Constant): if isinstance(expr.value, int): - return ir.Constant(ir.IntType(64), expr.value) + return ir.Constant(ir.IntType(64), expr.value), ir.IntType(64) elif isinstance(expr.value, bool): - return ir.Constant(ir.IntType(1), int(expr.value)) + return ir.Constant(ir.IntType(1), int(expr.value)), ir.IntType(1) else: print("Unsupported constant type") return None @@ -44,8 +44,9 @@ def eval_expr(func, module, builder, expr, local_sym_tab, map_sym_tab, structs_s if arg is None: print("Failed to evaluate deref argument") return None + # Since we are handling only name case, directly take type from sym tab val = builder.load(arg) - return val + return val, local_sym_tab[expr.args[0].id][1] # check for helpers if expr.func.id in helper_func_list: diff --git a/pythonbpf/functions_pass.py b/pythonbpf/functions_pass.py index d7188f2..674617a 100644 --- a/pythonbpf/functions_pass.py +++ b/pythonbpf/functions_pass.py @@ -58,10 +58,18 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc inbounds=True) val = eval_expr(func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab) + if isinstance(struct_info["field_types"][field_idx], ir.ArrayType) and val[1] == ir.PointerType(ir.IntType(8)): + # TODO: Figure it out, not a priority rn + # Special case for string assignment to char array + #str_len = struct_info["field_types"][field_idx].count + #assign_string_to_array(builder, field_ptr, val[0], str_len) + #print(f"Assigned to struct field {var_name}.{field_name}") + pass if val is None: print("Failed to evaluate struct field assignment") return - builder.store(val, field_ptr) + print(field_ptr) + builder.store(val[0], field_ptr) print(f"Assigned to struct field {var_name}.{field_name}") return elif isinstance(rval, ast.Constant): @@ -114,7 +122,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc # var.align = 8 val = handle_helper_call( rval, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata) - builder.store(val, local_sym_tab[var_name][0]) + builder.store(val[0], local_sym_tab[var_name][0]) # local_sym_tab[var_name] = var print(f"Assigned constant {rval.func.id} to {var_name}") elif call_type == "deref" and len(rval.args) == 1: @@ -125,7 +133,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc print("Failed to evaluate deref argument") return print(f"Dereferenced value: {val}, storing in {var_name}") - builder.store(val, local_sym_tab[var_name][0]) + builder.store(val[0], local_sym_tab[var_name][0]) # local_sym_tab[var_name] = var print(f"Dereferenced and assigned to {var_name}") elif call_type in structs_sym_tab and len(rval.args) == 0: @@ -155,7 +163,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc rval, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata) # var = builder.alloca(ir.IntType(64), name=var_name) # var.align = 8 - builder.store(val, local_sym_tab[var_name][0]) + builder.store(val[0], local_sym_tab[var_name][0]) # local_sym_tab[var_name] = var else: print("Unsupported assignment call structure") @@ -196,12 +204,12 @@ def handle_cond(func, module, builder, cond, local_sym_tab, map_sym_tab): return None elif isinstance(cond, ast.Compare): lhs = eval_expr(func, module, builder, cond.left, - local_sym_tab, map_sym_tab) + local_sym_tab, map_sym_tab)[0] if len(cond.ops) != 1 or len(cond.comparators) != 1: print("Unsupported complex comparison") return None rhs = eval_expr(func, module, builder, - cond.comparators[0], local_sym_tab, map_sym_tab) + cond.comparators[0], local_sym_tab, map_sym_tab)[0] op = cond.ops[0] if lhs.type != rhs.type: @@ -462,7 +470,6 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t process_func_body(module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab) - return func @@ -538,3 +545,46 @@ def _expr_type(e): raise ValueError("Conflicting return types:" f"{found_type} vs {t}") return found_type or "None" + +# For string assignment to fixed-size arrays +def assign_string_to_array(builder, target_array_ptr, source_string_ptr, array_length): + """ + Copy a string (i8*) to a fixed-size array ([N x i8]*) + """ + # Create a loop to copy characters one by one + entry_block = builder.block + copy_block = builder.append_basic_block("copy_char") + end_block = builder.append_basic_block("copy_end") + + # Create loop counter + i = builder.alloca(ir.IntType(32)) + builder.store(ir.Constant(ir.IntType(32), 0), i) + + # Start the loop + builder.branch(copy_block) + + # Copy loop + builder.position_at_end(copy_block) + idx = builder.load(i) + in_bounds = builder.icmp_unsigned('<', idx, ir.Constant(ir.IntType(32), array_length)) + builder.cbranch(in_bounds, copy_block, end_block) + + with builder.if_then(in_bounds): + # Load character from source + src_ptr = builder.gep(source_string_ptr, [idx]) + char = builder.load(src_ptr) + + # Store character in target + dst_ptr = builder.gep(target_array_ptr, [ir.Constant(ir.IntType(32), 0), idx]) + builder.store(char, dst_ptr) + + # Increment counter + next_idx = builder.add(idx, ir.Constant(ir.IntType(32), 1)) + builder.store(next_idx, i) + + builder.position_at_end(end_block) + + # Ensure null termination + last_idx = ir.Constant(ir.IntType(32), array_length - 1) + null_ptr = builder.gep(target_array_ptr, [ir.Constant(ir.IntType(32), 0), last_idx]) + builder.store(ir.Constant(ir.IntType(8), 0), null_ptr) diff --git a/pythonbpf/structs_pass.py b/pythonbpf/structs_pass.py index fbe06ed..19ad3c9 100644 --- a/pythonbpf/structs_pass.py +++ b/pythonbpf/structs_pass.py @@ -28,14 +28,25 @@ def process_bpf_struct(cls_node, module): for item in cls_node.body: if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name): + print(f"Field: {item.target.id}, Type: " + f"{ast.dump(item.annotation)}") field_names.append(item.target.id) - field_types.append(ctypes_to_ir(item.annotation.id)) + if isinstance(item.annotation, ast.Call) and isinstance(item.annotation.func, ast.Name) and item.annotation.func.id == "str": + # This is a char array with fixed length + # TODO: For now assuming str is always called with constant + field_types.append(ir.ArrayType( + ir.IntType(8), item.annotation.args[0].value)) + else: + field_types.append(ctypes_to_ir(item.annotation.id)) curr_offset = 0 for ftype in field_types: if isinstance(ftype, ir.IntType): fsize = ftype.width // 8 alignment = fsize + elif isinstance(ftype, ir.ArrayType): + fsize = ftype.count * (ftype.element.width // 8) + alignment = ftype.element.width // 8 elif isinstance(ftype, ir.PointerType): fsize = 8 alignment = 8 @@ -52,6 +63,7 @@ def process_bpf_struct(cls_node, module): structs_sym_tab[struct_name] = { "type": struct_type, "fields": {name: idx for idx, name in enumerate(field_names)}, - "size": total_size + "size": total_size, + "field_types": field_types, } print(f"Created struct {struct_name} with fields {field_names}")