diff --git a/arch/aarch64/mcount-insn.c b/arch/aarch64/mcount-insn.c index 13488487b..7e0a2dc8a 100644 --- a/arch/aarch64/mcount-insn.c +++ b/arch/aarch64/mcount-insn.c @@ -8,10 +8,19 @@ #include #include +/** + * mcount_disasm_init - initialize the capstone engine once + * @disasm - mcount disassembly engine + */ void mcount_disasm_init(struct mcount_disasm_engine *disasm) { + if (disasm->engine) + return; + + pr_dbg2("initialize disassembly engine\n"); + if (cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &disasm->engine) != CS_ERR_OK) { - pr_dbg("failed to init Capstone disasm engine\n"); + pr_dbg("failed to init capstone disasm engine\n"); return; } diff --git a/arch/x86_64/mcount-dynamic.c b/arch/x86_64/mcount-dynamic.c index a14642287..a831c6eb1 100644 --- a/arch/x86_64/mcount-dynamic.c +++ b/arch/x86_64/mcount-dynamic.c @@ -1,7 +1,15 @@ +#include +#include #include #include #include +#if HAVE_MEMBARRIER +#include +#else +#include +#endif + /* This should be defined before #include "utils.h" */ #define PR_FMT "dynamic" #define PR_DOMAIN DBG_DYNAMIC @@ -9,6 +17,7 @@ #include "libmcount/dynamic.h" #include "libmcount/internal.h" #include "mcount-arch.h" +#include "utils/hashmap.h" #include "utils/symbol.h" #include "utils/utils.h" @@ -19,6 +28,429 @@ static const unsigned char patchable_clang_nop[] = { 0x0f, 0x1f, 0x44, 0x00, 0x0 static const unsigned char endbr64[] = { 0xf3, 0x0f, 0x1e, 0xfa }; +extern bool mcount_target_running; + +/* + * Hashmap of trampoline locations for every installed int3 trap. The traps are + * used when patching and unpatching functions: they prevent threads from + * entering the critical zone. When a trap is reached, the trap handler fetches + * the trampoline location from the hmap, and emulates a call to the trampoline, + * to mimic regular instrumentation. + * + * (void *int3_location -> void *trampoline) + */ +static struct Hashmap *int3_hmap; + +/* Hashmap of the addresses of instructions that are in patching zones and need + * to be executed out of line. The addresses are mapped to their out of line + * equivalent. + * + * (void *critical_insn -> void *out_of_line_insn) + */ +static struct Hashmap *patch_region_hmap; + +/* Realtime signal number to instruct running threads to move out of patching + regions */ +static int sig_clear_patch_region; + +/* counter for the threads that are guaranteed to be out of registered patching + regions when a signal is issued */ +static sem_t sem_clear_patch_region; + +/** + * register_trap - save trampoline associated to a trap + * @trap - trap address + * @call_loc - address of the symbol to emulate a call to + * @return - -1 on error, 0 on success + */ +static int register_trap(void *trap, void *call_loc) +{ + if (!hashmap_put(int3_hmap, trap, call_loc)) + return -1; + + return 0; +} + +/** + * unregister_trap - remove trap entry from hmap + * @trap - trap address + * @return - -1 on error, 0 on success + */ +static int unregister_trap(void *trap) +{ + if (hashmap_remove(int3_hmap, trap)) + return 0; + + return -1; +} + +/** + * emulate_trampoline_call - SIGTRAP handler, emulates a call to the trampoline + * associated with the trap location + * + * When the trap handler is executed, it changes the program counter to point to + * . When the trap handler exits, the code at will + * execute (which is __dentry__ defined in dynamic.S). + * + * As __dentry__ is expected to be called like a function, it depends on the + * address of the caller to know which tracepoint was executed. The address is + * expected to be found on the stack. Therefore, the trap handler actually needs + * to emulate a call instruction entirely (moving the instruction pointer is not + * enough). + * + * To do so, the trap handler will also push on the stack the next instruction + * pointer that would be used if the executed instruction was a call instead of + * a trap. + * + * @sig - signal caught + * @info - (unused) + * @ucontext - user context, containing registers + */ +static void emulate_trampoline_call(int sig, siginfo_t *info, void *ucontext) +{ + ucontext_t *uctx = ucontext; + mcontext_t *mctx = &uctx->uc_mcontext; + void *int3_addr; + unsigned long trampoline; + unsigned long child_addr; /* probe location for mcount_entry */ + + ASSERT(sig == SIGTRAP); + + __atomic_signal_fence(__ATOMIC_SEQ_CST); + int3_addr = (void *)mctx->gregs[REG_RIP] - 1; /* int3 size = 1 */ + trampoline = (unsigned long)hashmap_get(int3_hmap, int3_addr); + + child_addr = (unsigned long)int3_addr + CALL_INSN_SIZE; + + mctx->gregs[REG_RSP] -= 8; + memcpy((void *)mctx->gregs[REG_RSP], &child_addr, 8); + mctx->gregs[REG_RIP] = trampoline; +} + +/** + * configure_sigtrap_handler - emulate call to trampoline on SIGTRAP + * @return - -1 on failure, 0 on success + */ +static int configure_sigtrap_handler() +{ + struct sigaction act; + + act.sa_sigaction = emulate_trampoline_call; + act.sa_flags = SA_SIGINFO; + + if (sigaction(SIGTRAP, &act, NULL) < 0) { + pr_err("failed to configure SIGTRAP handler\n"); + return -1; + } + + pr_dbg2("configured SIGTRAP handler\n"); + return 0; +} + +#if HAVE_MEMBARRIER + +/** + * setup_synchronization_mechanism - register intent to use the 'private + * expedited sync core' membarrier to synchronize instruction pipelines and + * caches across cores, for safe cross-modification. + * @return - negative on error, 0 on success + */ +static int setup_synchronization_mechanism() +{ + int ret = + syscall(__NR_membarrier, MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, 0, 0); + if (ret < 0) + pr_dbg2("failed to register membarrier intent: %s\n", strerror(errno)); + return ret; +} + +/** + * synchronize_all_cores - use membarrier to perform cache and pipeline + * synchronization across cores executing cross-modified code + * @return - negative on error, 0 on success + */ +static int synchronize_all_cores() +{ + int ret = syscall(__NR_membarrier, MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, 0, 0); + if (ret < 0) + pr_dbg2("failed to use membarrier: %s\n", strerror(errno)); + return ret; +} + +#else /* HAVE_MEMBARRIER */ + +/* signal used to perform cache and pipeline synchronization across cores */ +static int sig_sync_cores; + +/* counter for the threads that have performed serialization when a signal is + issued */ +static sem_t sem_sync_cores; + +/** + * serialize_instruction_execution - execute core serialize instruction + * + * According to Intel manual, CPUID is a serializing instruction. + */ +static void serialize_instruction_execution() +{ + int _; + __cpuid(_, _, _, _, _); + sem_post(&sem_sync_cores); +} + +/** + * setup_synchronization_mechanism - setup real-time signal and its handler to + * perform core synchronization across all threads + * @return - 0 on success, -1 on failure + */ +static int setup_synchronization_mechanism() +{ + struct sigaction act; + + if (sig_sync_cores > 0) + return 0; + + sig_sync_cores = find_unused_sigrt(); + if (sig_sync_cores == -1) + return -1; + + sem_init(&sem_sync_cores, 0, 0); + + act.sa_sigaction = serialize_instruction_execution; + act.sa_flags = 0; + + if (sigaction(sig_sync_cores, &act, NULL) < 0) { + pr_dbg("failed to configure core synchronization signal handler\n"); + return -1; + } + + pr_dbg("configured core synchronization signal (SIGRT%d) handler\n", sig_sync_cores); + return 0; +} + +/** + * serialize_instruction_cache - send RT signals to perform cache and pipeline + * synchronization across cores executing cross-modified code. + * @return - -1 on error, 0 on success + */ +static int synchronize_all_cores() +{ + int signal_count; + int sync_count = 0; + struct timespec ts; + + ASSERT(sig_sync_cores >= SIGRTMIN); + + signal_count = thread_broadcast_signal(sig_sync_cores); + + if (clock_gettime(CLOCK_REALTIME, &ts) == -1) + return -1; + ts.tv_sec += 1; + + for (int i = 0; i < signal_count; i++) { + if (sem_timedwait(&sem_sync_cores, &ts) == -1) { + if (errno == EINTR) + i--; + else + pr_dbg3("error syncing with signal handler: %s\n", strerror(errno)); + } + else + sync_count++; + } + pr_dbg3("synced core in %d/%d thread(s)\n", sync_count, signal_count); + + return 0; +} + +#endif /* HAVE_MEMBARRIER */ + +/** + * register_patch_region - mark a memory region as critical by registering the + * addresses that it contains in 'patch_region_hmap' + * @start - address of the patch region + * @len - length of the patch region + * @return - -1 on error, 0 on success + */ +static int register_patch_region(void *start, int len) +{ + void *out_of_line_buffer = mcount_find_code((unsigned long)start + CALL_INSN_SIZE); + if (!out_of_line_buffer) + return -1; + + for (int i = 0; i < len; i++) { + if (!hashmap_put(patch_region_hmap, start + i, out_of_line_buffer + i)) + return -1; + } + + return 0; +} + +/** + * unregister_patch_region - unmark a memory region as critical + * @start - address of the patch region + * @len - length of the patch region + * @return - -1 on error, 0 on success + */ +static int unregister_patch_region(void *start, int len) +{ + void *out_of_line_buffer = mcount_find_code((unsigned long)start); + if (!out_of_line_buffer) + return -1; + + for (int i = 0; i < len; i++) { + if (!hashmap_remove(patch_region_hmap, start + i)) + return -1; + } + + return 0; +} + +/** + * leave_patch_region - signal handler on which a thread executes out of line if + * it happens to be in a registered patching region + * @sig - signal number + * @info - signal info (unused) + * @ucontext - user context + */ +static void leave_patch_region(int sig, siginfo_t *info, void *ucontext) +{ + ucontext_t *uctx = ucontext; + mcontext_t *mctx = &uctx->uc_mcontext; + void *next_insn; + void *out_of_line_insn; + (void)sig; + + next_insn = (void *)mctx->gregs[REG_RIP]; + out_of_line_insn = hashmap_get(patch_region_hmap, next_insn); + if (out_of_line_insn) + mctx->gregs[REG_RIP] = (uint64_t)out_of_line_insn; + + sem_post(&sem_clear_patch_region); +} + +/** + * clear_patch_regions - move threads that are in a patching region out of line + * @return - 0 + */ +static int clear_patch_regions() +{ + int signal_count; + int move_count = 0; + struct timespec ts; + + ASSERT(sig_clear_patch_region >= SIGRTMIN); + + signal_count = thread_broadcast_signal(sig_clear_patch_region); + + if (clock_gettime(CLOCK_REALTIME, &ts) == -1) + return -1; + ts.tv_sec += 1; + + for (int i = 0; i < signal_count; i++) { + if (sem_timedwait(&sem_clear_patch_region, &ts) == -1) { + if (errno == EINTR) + i--; + else + pr_dbg3("error syncing with signal handler: %s\n", strerror(errno)); + } + else + move_count++; + } + pr_dbg3("checked ip of %d/%d thread(s)\n", move_count, signal_count); + + return 0; +} + +/** + * setup_clear_patch_region - initialize data structures and signals used to + * move threads of patching regions + * @return - -1 on error, 0 on success + */ +int setup_clear_patch_region() +{ + struct sigaction act; + + if (!patch_region_hmap) { + patch_region_hmap = hashmap_create(4, hashmap_ptr_hash, hashmap_ptr_equals); + if (!patch_region_hmap) { + pr_dbg("failed to create patch region hashmap\n"); + return -1; + } + } + + if (sig_clear_patch_region > 0) + return 0; + + sig_clear_patch_region = find_unused_sigrt(); + if (sig_clear_patch_region == -1) + return -1; + + sem_init(&sem_clear_patch_region, 0, 0); + + act.sa_sigaction = leave_patch_region; + act.sa_flags = 0; + + if (sigaction(sig_clear_patch_region, &act, NULL) < 0) { + pr_dbg("failed to configure clear signal (SIGRT%d) handler\n", + sig_clear_patch_region); + return -1; + } + + pr_dbg("configured clear signal (SIGRT%d) handler\n", sig_clear_patch_region); + return 0; +} + +/* These two lists are used to store functions that need to be optimized or + * cleaned up later in the code. In both case, a SIGRTMIN+n must be send. By + * optimizing or cleaning all of them up at the same time, we only need to send + * one signal per thread. + */ +LIST_HEAD(normal_funcs_patch); +LIST_HEAD(normal_funcs_unpatch); + +struct patch_dynamic_info { + struct list_head list; + struct mcount_dynamic_info *mdi; + struct mcount_disasm_info *info; +}; + +static void commit_normal_func(struct list_head *list, struct mcount_dynamic_info *mdi, + struct mcount_disasm_info *info) +{ + struct patch_dynamic_info *pdi; + pdi = xmalloc(sizeof(*pdi)); + + pdi->mdi = mdi; + pdi->info = info; + INIT_LIST_HEAD(&pdi->list); + + list_add(&pdi->list, list); +} + +/** + * mcount_arch_dynamic_init - initialize arch-specific data structures to + * perform runtime dynamic instrumentation + */ +int mcount_arch_dynamic_init() +{ + if (!int3_hmap) { + int3_hmap = hashmap_create(4, hashmap_ptr_hash, hashmap_ptr_equals); + if (!int3_hmap) { + pr_dbg("failed to create int3 hashmap\n"); + return -1; + } + } + if (configure_sigtrap_handler() < 0) + return -1; + + if (setup_synchronization_mechanism() < 0) + return -1; + + setup_clear_patch_region(); + + return 0; +} + int mcount_setup_trampoline(struct mcount_dynamic_info *mdi) { unsigned char trampoline[] = { 0x3e, 0xff, 0x25, 0x01, 0x00, 0x00, 0x00, 0xcc }; @@ -247,9 +679,15 @@ void mcount_arch_find_module(struct mcount_dynamic_info *mdi, struct uftrace_sym elf_finish(&elf); } -static unsigned long get_target_addr(struct mcount_dynamic_info *mdi, unsigned long addr) +/** + * get_trampoline_offest - compute the relative address of the trampoline + * @mdi - mcount dynamic info + * @origin - origin address + * @return - distance to the trampoline + */ +static unsigned long get_trampoline_offset(struct mcount_dynamic_info *mdi, unsigned long origin) { - return mdi->trampoline - (addr + CALL_INSN_SIZE); + return mdi->trampoline - (origin + CALL_INSN_SIZE); } static int patch_fentry_code(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym) @@ -271,7 +709,7 @@ static int patch_fentry_code(struct mcount_dynamic_info *mdi, struct uftrace_sym } /* get the jump offset to the trampoline */ - target_addr = get_target_addr(mdi, (unsigned long)insn); + target_addr = get_trampoline_offset(mdi, (unsigned long)insn); if (target_addr == 0) return INSTRUMENT_SKIPPED; @@ -380,97 +818,140 @@ static int patch_xray_func(struct mcount_dynamic_info *mdi, struct uftrace_symbo return ret; } -/* - * we overwrite instructions over 5bytes from start of function - * to call '__dentry__' that seems similar like '__fentry__'. - * - * while overwriting, After adding the generated instruction which - * returns to the address of the original instruction end, - * save it in the heap. - * - * for example: - * - * 4005f0: 31 ed xor %ebp,%ebp - * 4005f2: 49 89 d1 mov %rdx,%r9 - * 4005f5: 5e pop %rsi - * - * will changed like this : - * - * 4005f0 call qword ptr [rip + 0x200a0a] # 0x601000 - * - * and keeping original instruction : - * - * Original Instructions--------------- - * f1cff0: xor ebp, ebp - * f1cff2: mov r9, rdx - * f1cff5: pop rsi - * Generated Instruction to return----- - * f1cff6: jmp qword ptr [rip] - * f1cffc: QW 0x00000000004005f6 - * - * In the original case, address 0x601000 has a dynamic symbol - * start address. It is also the first element in the GOT array. - * while initializing the mcount library, we will replace it with - * the address of the function '__dentry__'. so, the changed - * instruction will be calling '__dentry__'. - * - * '__dentry__' has a similar function like '__fentry__'. - * the other thing is that it returns to original instructions - * we keeping. it makes it possible to execute the original - * instructions and return to the address at the end of the original - * instructions. Thus, the execution will goes on. - * +/** + * patch_region_lock - insert a trap at the beginning of a patching region so no + * new incoming thread will execute it, and register the region as critical for + * next step + * @mdi - mcount dynamic info + * @info - disassembly info for the patched symbol + * @return - INSTRUMENT_SUCCESS */ +static int patch_region_lock(struct mcount_dynamic_info *mdi, struct mcount_disasm_info *info) +{ + /* + * Let's assume we have the following instructions. + * + * 0x0: push %rbp <= origin_code_addr + * 0x1: mov %rsp,%rbp + * 0x4: lea 0xeb0(%rip),%rdi + * 0xb: + * + * The goal is to modify the instructions in order to get the following + * ones. + * + * 0x0: call + * 0x5: + * 0xb: + */ -/* - * Patch the instruction to the address as given for arguments. + void *origin_code_addr; + + origin_code_addr = (void *)info->addr; + if (info->has_intel_cet) + origin_code_addr += ENDBR_INSN_SIZE; + + /* The first step is to insert a 1-byte trap-based probe point (atomic). + * This will prevent threads to enter the critical zone while we patch it, + * so no core will see partial modifications. + * + * 0x0: int3 <= origin_code_addr + * 0x1: mov %rsp,%rbp + * 0x4: lea 0xeb0(%rip),%rdi + * 0xb: + * + * The trap will emulate a call to the trampoline while in place. + */ + + if (register_trap(origin_code_addr, (void *)mdi->trampoline) == -1) + return INSTRUMENT_FAILED; + ((uint8_t *)origin_code_addr)[0] = 0xcc; + + /* The second step is to move any thread out of the critical zone if still + * present. Threads in the critical zone resume execution out of line, in + * their dedicated OLX region. + * + * The method used to move the threads is to signal all the threads, so they + * check if their instruction pointer is in the patching region. If so, they + * move their instruction pointer to the corresponding one in the OLX + * region. + */ + + if (register_patch_region(origin_code_addr, info->orig_size) == -1) + pr_dbg3("failed to register patch region\n"); + + return INSTRUMENT_SUCCESS; +} + +/** + * patch_code - patch a region of the code with the operand of the call + * instruction used to probe a function. The call opcode needs to be inserted + * later. + * @mdi - mcount dynamic info for the current module + * @info - disassembly info for the patched symbol */ static void patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info *info) { + void *trampoline_rel_addr; void *origin_code_addr; - unsigned char call_insn[] = { 0xe8, 0x00, 0x00, 0x00, 0x00 }; - uint32_t target_addr = get_target_addr(mdi, info->addr); - /* patch address */ origin_code_addr = (void *)info->addr; - - if (info->has_intel_cet) { + if (info->has_intel_cet) origin_code_addr += ENDBR_INSN_SIZE; - target_addr = get_target_addr(mdi, info->addr + ENDBR_INSN_SIZE); - } - - /* build the instrumentation instruction */ - memcpy(&call_insn[1], &target_addr, CALL_INSN_SIZE - 1); + trampoline_rel_addr = (void *)get_trampoline_offset(mdi, (unsigned long)origin_code_addr); - /* - * we need 5-bytes at least to instrumentation. however, - * if instructions is not fit 5-bytes, we will overwrite the - * 5-bytes and fill the remaining part of the last - * instruction with nop. - * - * [example] - * In this example, we overwrite 9-bytes to use 5-bytes. + /* The third step is to write the target address of the call. From the + * processor view the 4-bytes address can be any garbage instructions. * - * dynamic: 0x19e98b0[01]:push rbp - * dynamic: 0x19e98b1[03]:mov rbp, rsp - * dynamic: 0x19e98b4[05]:mov edi, 0x4005f4 + * We fill the remaining part of the patching region with nops. * - * dynamic: 0x40054c[05]:call 0x400ff0 - * dynamic: 0x400551[01]:nop - * dynamic: 0x400552[01]:nop - * dynamic: 0x400553[01]:nop - * dynamic: 0x400554[01]:nop + * 0x0: int3 + * 0x1: + * 0x5: + * 0xb: */ - memcpy(origin_code_addr, call_insn, CALL_INSN_SIZE); + + memcpy(&((uint8_t *)origin_code_addr)[1], &trampoline_rel_addr, CALL_INSN_SIZE - 1); memset(origin_code_addr + CALL_INSN_SIZE, 0x90, /* NOP */ info->orig_size - CALL_INSN_SIZE); +} + +/** + * patch_region_unlock - unmark a region as critical and remove the trap that + * prevents execution. + * @info - disassembly info for the patched symbol + */ +static void patch_region_unlock(struct mcount_disasm_info *info) +{ + void *origin_code_addr; + + origin_code_addr = (void *)info->addr; + if (info->has_intel_cet) + origin_code_addr += ENDBR_INSN_SIZE; - /* flush icache so that cpu can execute the new insn */ - __builtin___clear_cache(origin_code_addr, origin_code_addr + info->orig_size); + if (unregister_patch_region(origin_code_addr, info->orig_size) == -1) + pr_dbg3("failed to unregister patch region\n"); + + /* + * The fourth and last step is to replace the trap with the call opcode. + * + * 0x0: call + * 0x5: + * 0xb: + */ + ((uint8_t *)origin_code_addr)[0] = 0xe8; } -static int patch_normal_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym, - struct mcount_disasm_engine *disasm) +/** + * patch_normal_func_init - perform the initial steps of the patching process, + * awaiting for sanitization of the critical region. This step is batched with + * subsequent ones. + * @mdi - mcount dynamic info for the current module + * @sym - symbol to patch + * @disasm - disassembly engine + * @return - instrumentation status + */ +static int patch_normal_func_init(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym, + struct mcount_disasm_engine *disasm) { uint8_t jmp_insn[15] = { 0x3e, @@ -478,20 +959,22 @@ static int patch_normal_func(struct mcount_dynamic_info *mdi, struct uftrace_sym 0x25, }; uint64_t jmp_target; - struct mcount_disasm_info info = { - .sym = sym, - .addr = mdi->map->start + sym->addr, - }; + struct mcount_disasm_info *info; unsigned call_offset = CALL_INSN_SIZE; int state; - state = disasm_check_insns(disasm, mdi, &info); + info = xmalloc(sizeof(*info)); + memset(info, 0, sizeof(*info)); + info->sym = sym; + info->addr = mdi->map->start + sym->addr; + + state = disasm_check_insns(disasm, mdi, info); if (state != INSTRUMENT_SUCCESS) { pr_dbg3(" >> %s: %s\n", state == INSTRUMENT_FAILED ? "FAIL" : "SKIP", sym->name); return state; } - pr_dbg2("force patch normal func: %s (patch size: %d)\n", sym->name, info.orig_size); + pr_dbg2("force patch normal func: %s (patch size: %d)\n", sym->name, info->orig_size); /* * stored origin instruction block: @@ -503,22 +986,231 @@ static int patch_normal_func(struct mcount_dynamic_info *mdi, struct uftrace_sym * | [Return address] | * ---------------------- */ - jmp_target = info.addr + info.orig_size; - if (info.has_intel_cet) { + jmp_target = info->addr + info->orig_size; + if (info->has_intel_cet) { jmp_target += ENDBR_INSN_SIZE; call_offset += ENDBR_INSN_SIZE; } memcpy(jmp_insn + CET_JMP_INSN_SIZE, &jmp_target, sizeof(jmp_target)); - if (info.has_jump) - mcount_save_code(&info, call_offset, jmp_insn, 0); + if (info->has_jump) + mcount_save_code(info, call_offset, jmp_insn, 0); else - mcount_save_code(&info, call_offset, jmp_insn, sizeof(jmp_insn)); + mcount_save_code(info, call_offset, jmp_insn, sizeof(jmp_insn)); - patch_code(mdi, &info); + if (mcount_target_running) { + state = patch_region_lock(mdi, info); + commit_normal_func(&normal_funcs_patch, mdi, info); + } + else { + patch_code(mdi, info); + patch_region_unlock(info); + } - return INSTRUMENT_SUCCESS; + return state; +} + +/** + * mcount_patch_normal_func_fini - perform the final step of the patching process and cleanup + * @return - 0 + */ +void mcount_patch_normal_func_fini() +{ + struct patch_dynamic_info *pdi, *tmp; + + if (!mcount_target_running) + return; + + if (list_empty(&normal_funcs_patch)) + return; + + /* We ensure that every core sees the trap before patching the critical + * zone, by synchronizing the them. + */ + synchronize_all_cores(); + clear_patch_regions(); + + list_for_each_entry_safe(pdi, tmp, &normal_funcs_patch, list) { + patch_code(pdi->mdi, pdi->info); + write_memory_barrier(); + patch_region_unlock(pdi->info); + } + + synchronize_all_cores(); + + list_for_each_entry_safe(pdi, tmp, &normal_funcs_patch, list) { + void *origin_code_addr; + + origin_code_addr = (void *)pdi->info->addr; + if (pdi->info->has_intel_cet) + origin_code_addr += ENDBR_INSN_SIZE; + + unregister_trap(origin_code_addr); + list_del(&pdi->list); + free(pdi->info); + free(pdi); + } +} + +extern int check_endbr64(unsigned long addr); + +/** + * unpatch_code - restore original instructions except the trap at the beginning + * @info - mcount disassembly info + * @return - -1 on error, 0 on success + */ +int unpatch_code(struct mcount_disasm_info *info) +{ + uint8_t *origin_code = info->insns; + uint8_t *origin_code_addr = (uint8_t *)info->addr; + int orig_size = info->orig_size; + + /* + * The second step is to restore the bytes after the trap instruction. + * + * 0x0: int3 + * 0x1: mov %rsp,%rbp + * 0x4: lea 0xeb0(%rip),%rdi + * 0xb: + * + * Before restoring the last byte, a serialization instruction must be + * executed in order to synchronize the instruction cache of each processor. + * The easiest method is to execute a membarrier system call with + * MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE. It will send a + * inter-processor interrupt that will execute the required serialization. + */ + + memcpy(&origin_code_addr[1], &origin_code[1], orig_size - 1); + + return 0; +} + +/** + * unpatch_normal_func_init - perform the initial steps of the unpatching + * process, awaiting for core synchronization. This step is batched with + * subsequent ones. + * @mdi - mcount dynamic info for the current module + * @sym - symbol to patch + * @return - instrumentation status + */ +static int unpatch_normal_func_init(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym) +{ + /* + * Let assume that we have the following instructions. + * + * 0x0: call + * 0x5: + * 0xb: + * + * The goal is to modify the instructions in order to get the following + * instructions. + * + * 0x0: push %rbp + * 0x1: mov %rsp,%rbp + * 0x4: lea 0xeb0(%rip),%rdi + * 0xb: + */ + + unsigned long origin_code_addr; + unsigned call_offset = CALL_INSN_SIZE; + struct mcount_orig_insn *orig; + uint8_t *origin_code; + int orig_size; + struct mcount_disasm_info *info; + + origin_code_addr = mdi->map->start + sym->addr; + if (check_endbr64(origin_code_addr)) + origin_code_addr += ENDBR_INSN_SIZE; + orig = mcount_find_insn(origin_code_addr + call_offset); + if (!orig) { + pr_dbg("cannot find original instructions for '%s' at %#lx\n", + origin_code_addr + call_offset, sym->name); + return -1; + } + origin_code = orig->orig; + orig_size = orig->orig_size; + + pr_dbg2("unpatch normal func: %s (patch size: %d)\n", sym->name, orig_size); + + if (mcount_target_running) { + info = xmalloc(sizeof(*info)); + memset(info, 0, sizeof(*info)); + info->sym = sym; + info->addr = mdi->map->start + sym->addr; + ASSERT(orig_size < 64); /* fit into info->insns */ + memcpy(info->insns, origin_code, orig_size); + info->orig_size = orig_size; + + /* The first step is to insert a trap. + * + * 0x0: int3 + * 0x1: + * 0x5: + * 0xb: + */ + + register_trap((void *)origin_code_addr, (void *)mdi->trampoline); + ((uint8_t *)origin_code_addr)[0] = 0xcc; + + commit_normal_func(&normal_funcs_unpatch, mdi, info); + } + else { + if (!memcpy((uint8_t *)origin_code_addr, origin_code, orig_size)) + return INSTRUMENT_FAILED; + } + + return 0; +} + +/** + * unpatch_region_unlock - remove the trap at the start of the unpatched region + * @info - mcount disassembly info + * @return - 0 + */ +int unpatch_region_unlock(struct mcount_disasm_info *info) +{ + uint8_t *origin_code = info->insns; + uint8_t *origin_code_addr = (uint8_t *)info->addr; + + /* + * The third step is to restore the last byte. + * + * 0x0: push %rbp + * 0x1: mov %rsp,%rbp + * 0x4: lea 0xeb0(%rip),%rdi + * 0xb: + */ + + origin_code_addr[0] = origin_code[0]; + + return 0; +} + +/** + * mcount_unpatch_normal_func_fini - perform the final steps of the unpatching + * process and cleanup + */ +void mcount_unpatch_normal_func_fini() +{ + struct patch_dynamic_info *pdi, *tmp; + + if (!mcount_target_running) + return; + + if (list_empty(&normal_funcs_unpatch)) + return; + + synchronize_all_cores(); + + list_for_each_entry_safe(pdi, tmp, &normal_funcs_unpatch, list) { + unpatch_code(pdi->info); + write_memory_barrier(); + unpatch_region_unlock(pdi->info); + list_del(&pdi->list); + free(pdi->info); + free(pdi); + } } static int unpatch_func(uint8_t *insn, char *name) @@ -607,7 +1299,7 @@ int mcount_patch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sy break; case DYNAMIC_NONE: - result = patch_normal_func(mdi, sym, disasm); + result = patch_normal_func_init(mdi, sym, disasm); break; default: @@ -616,8 +1308,7 @@ int mcount_patch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sy return result; } -int mcount_unpatch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym, - struct mcount_disasm_engine *disasm) +int mcount_unpatch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym) { int result = INSTRUMENT_SKIPPED; @@ -630,6 +1321,10 @@ int mcount_unpatch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol * result = unpatch_mcount_func(mdi, sym); break; + case DYNAMIC_NONE: + result = unpatch_normal_func_init(mdi, sym); + break; + default: break; } diff --git a/arch/x86_64/mcount-insn.c b/arch/x86_64/mcount-insn.c index 4097f753a..2725c0eb5 100644 --- a/arch/x86_64/mcount-insn.c +++ b/arch/x86_64/mcount-insn.c @@ -19,8 +19,17 @@ struct disasm_check_data { uint32_t size; }; +/** + * mcount_disasm_init - initialize the capstone engine once + * @disasm - mcount disassembly engine + */ void mcount_disasm_init(struct mcount_disasm_engine *disasm) { + if (disasm->engine) + return; + + pr_dbg2("initialize disassembly engine\n"); + if (cs_open(CS_ARCH_X86, CS_MODE_64, &disasm->engine) != CS_ERR_OK) { pr_dbg("failed to init capstone disasm engine\n"); return; @@ -562,15 +571,29 @@ static bool check_unsupported(struct mcount_disasm_engine *disasm, cs_insn *insn return true; } +/** + * check_endbr64 - check if instruction at @addr is endbr64 + * @addr - address to check for endbr64 + * @return - 1 if found, 0 if not + */ +int check_endbr64(unsigned long addr) +{ + uint8_t endbr64[] = { 0xf3, 0x0f, 0x1e, 0xfa }; + + return !memcmp((void *)addr, endbr64, sizeof(endbr64)); +} + int disasm_check_insns(struct mcount_disasm_engine *disasm, struct mcount_dynamic_info *mdi, struct mcount_disasm_info *info) { int status; cs_insn *insn = NULL; uint32_t count, i, size; - uint8_t endbr64[] = { 0xf3, 0x0f, 0x1e, 0xfa }; + void *trampoline_addr; + uint8_t *operand; struct dynamic_bad_symbol *badsym; unsigned long addr = info->addr; + bool is_call, is_trampoline; badsym = mcount_find_badsym(mdi, info->addr); if (badsym != NULL) { @@ -588,9 +611,9 @@ int disasm_check_insns(struct mcount_disasm_engine *disasm, struct mcount_dynami return INSTRUMENT_SKIPPED; size = info->sym->size; - if (!memcmp((void *)info->addr, endbr64, sizeof(endbr64))) { - addr += sizeof(endbr64); - size -= sizeof(endbr64); + if (check_endbr64(info->addr)) { + addr += ENDBR_INSN_SIZE; + size -= ENDBR_INSN_SIZE; if (size <= CALL_INSN_SIZE) return INSTRUMENT_SKIPPED; @@ -602,6 +625,20 @@ int disasm_check_insns(struct mcount_disasm_engine *disasm, struct mcount_dynami if (count == 0) return INSTRUMENT_FAILED; + /* Check for pre-existing dynamic instrumentation + 1. check if opcode is call + 2. check operand is trampoline address */ + operand = &((uint8_t *)info->addr)[1]; + trampoline_addr = (void *)mdi->trampoline - (info->addr + CALL_INSN_SIZE); + is_call = ((uint8_t *)info->addr)[0] == 0xe8; + if (is_call) { + is_trampoline = !memcmp(operand, &trampoline_addr, CALL_INSN_SIZE - 1); + if (is_trampoline) { + pr_dbg2("skip dynamically patched func: %s\n", info->sym->name); + return INSTRUMENT_SKIPPED; + } + } + for (i = 0; i < count; i++) { uint8_t insns_byte[32] = { 0, @@ -653,7 +690,7 @@ TEST_CASE(dynamic_x86_handle_lea) .addr = 0x3000, .size = 32, }; - struct mcount_disasm_engine disasm; + struct mcount_disasm_engine disasm = { 0 }; struct mcount_disasm_info info = { .sym = &sym, .addr = ORIGINAL_BASE + sym.addr, @@ -724,7 +761,7 @@ TEST_CASE(dynamic_x86_handle_call) .addr = 0x4000, .size = 32, }; - struct mcount_disasm_engine disasm; + struct mcount_disasm_engine disasm = { 0 }; struct mcount_disasm_info info = { .sym = &sym1, .addr = ORIGINAL_BASE + sym1.addr, @@ -802,7 +839,7 @@ TEST_CASE(dynamic_x86_handle_jmp) .addr = 0x3000, .size = 32, }; - struct mcount_disasm_engine disasm; + struct mcount_disasm_engine disasm = { 0 }; struct mcount_disasm_info info = { .sym = &sym, .addr = ORIGINAL_BASE + sym.addr, @@ -906,7 +943,7 @@ TEST_CASE(dynamic_x86_handle_jcc) .addr = 0x3000, .size = 32, }; - struct mcount_disasm_engine disasm; + struct mcount_disasm_engine disasm = { 0 }; struct mcount_disasm_info info = { .sym = &sym, .addr = ORIGINAL_BASE + sym.addr, @@ -1015,7 +1052,7 @@ TEST_CASE(dynamic_x86_handle_mov_load) .addr = 0x3000, .size = 32, }; - struct mcount_disasm_engine disasm; + struct mcount_disasm_engine disasm = { 0 }; struct mcount_disasm_info info = { .sym = &sym, .addr = ORIGINAL_BASE + sym.addr, diff --git a/check-deps/Makefile b/check-deps/Makefile index 2ca41b3ee..8f06dfae4 100644 --- a/check-deps/Makefile +++ b/check-deps/Makefile @@ -14,6 +14,7 @@ CHECK_LIST += have_libdw CHECK_LIST += have_libunwind CHECK_LIST += have_libcapstone CHECK_LIST += cc_has_minline_all_stringops +CHECK_LIST += have_membarrier # # This is needed for checking build dependency diff --git a/check-deps/Makefile.check b/check-deps/Makefile.check index 93443e145..85042658d 100644 --- a/check-deps/Makefile.check +++ b/check-deps/Makefile.check @@ -82,3 +82,7 @@ endif ifneq ($(wildcard $(objdir)/check-deps/cc_has_minline_all_stringops),) LIB_CFLAGS += -minline-all-stringops endif + +ifneq ($(wildcard $(objdir)/check-deps/have_membarrier),) + LIB_CFLAGS += -DHAVE_MEMBARRIER +endif diff --git a/check-deps/__have_membarrier.c b/check-deps/__have_membarrier.c new file mode 100644 index 000000000..30a7c5a92 --- /dev/null +++ b/check-deps/__have_membarrier.c @@ -0,0 +1,10 @@ +#include +#include +#include + +int main() +{ + syscall(__NR_membarrier, MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, 0, 0); + syscall(__NR_membarrier, MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, 0, 0); + return 0; +} diff --git a/cmds/live.c b/cmds/live.c index b641f7e2d..1b06550f9 100644 --- a/cmds/live.c +++ b/cmds/live.c @@ -340,9 +340,19 @@ static int forward_options(struct uftrace_opts *opts) if (capabilities < 0) goto close; - /* FIXME Forward user options and set status */ - if (0) - status = forward_option(sfd, capabilities, 0, NULL, 0); + if (opts->patch) { /* provide a pattern type for options that need it */ + status = forward_option(sfd, capabilities, UFTRACE_AGENT_OPT_PATTERN, + &opts->patt_type, sizeof(opts->patt_type)); + if (status < 0) + goto close; + } + + if (opts->patch) { + status = forward_option(sfd, capabilities, UFTRACE_AGENT_OPT_PATCH, opts->patch, + strlen(opts->patch) + 1); + if (status < 0) + goto close; + } close: status_close = agent_message_send(sfd, UFTRACE_MSG_AGENT_CLOSE, NULL, 0); diff --git a/configure b/configure index c90aab2ad..ead8571a8 100755 --- a/configure +++ b/configure @@ -35,6 +35,7 @@ usage() { --without-capstone build without libcapstone (even if found on the system) --without-perf build without perf event (even if available) --without-schedule build without scheduler event (even if available) + --without-membarrier build without membarriers (even if available) --arch= set target architecture (default: system default arch) e.g. x86_64, aarch64, i386, or arm @@ -198,6 +199,7 @@ for dep in $IGNORE; do capstone) TARGET=have_libcapstone ;; perf*) TARGET=perf_clockid ;; sched*) TARGET=perf_context_switch;; + membarrier) TARGET=have_membarrier ;; *) ;; esac if [ ! -z "$TARGET" ]; then @@ -265,6 +267,7 @@ print_feature "perf_event" "perf_clockid" "perf (PMU) event support" print_feature "schedule" "perf_context_switch" "scheduler event support" print_feature "capstone" "have_libcapstone" "full dynamic tracing support" print_feature "libunwind" "have_libunwind" "stacktrace support (optional for debugging)" +print_feature "membarrier" "have_membarrier" "membarrier support (requires Linux 4.16.0)" cat >$output < modified instructions ptr) */ static struct Hashmap *code_hmap; /* minimum function size for dynamic update */ @@ -60,6 +56,8 @@ static unsigned min_size; /* disassembly engine for dynamic code patch (for capstone) */ static struct mcount_disasm_engine disasm; +static bool mcount_dynamic_is_initialized; + static struct mcount_orig_insn *create_code(struct Hashmap *map, unsigned long addr) { struct mcount_orig_insn *entry; @@ -224,8 +222,15 @@ __weak int mcount_patch_func(struct mcount_dynamic_info *mdi, struct uftrace_sym return -1; } -__weak int mcount_unpatch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym, - struct mcount_disasm_engine *disasm) +__weak void mcount_patch_normal_func_fini() +{ +} + +__weak void mcount_unpatch_normal_func_fini() +{ +} + +__weak int mcount_unpatch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym) { return -1; } @@ -247,6 +252,11 @@ __weak void mcount_disasm_finish(struct mcount_disasm_engine *disasm) { } +__weak int mcount_arch_dynamic_init(void) +{ + return -1; +} + __weak int mcount_arch_branch_table_size(struct mcount_disasm_info *info) { return 0; @@ -258,6 +268,7 @@ __weak void mcount_arch_patch_branch(struct mcount_disasm_info *info, struct mco struct find_module_data { struct uftrace_sym_info *sinfo; + bool skip_first; /* fist module is the main binary */ bool needs_modules; }; @@ -293,8 +304,15 @@ static struct mcount_dynamic_info *create_mdi(struct dl_phdr_info *info) return mdi; } -/* callback for dl_iterate_phdr() */ -static int find_dynamic_module(struct dl_phdr_info *info, size_t sz, void *data) +/** + * prepare_dynamic_module - store dynamic module info and install trampoline; + * callback for dl_iterate_phdr() + * @info - module info + * @sz - data size (unused) + * @data - callback data: symbol info and module parsing flag + * @return - stop iteration on non-zero value + */ +static int prepare_dynamic_module(struct dl_phdr_info *info, size_t sz, void *data) { struct mcount_dynamic_info *mdi; struct find_module_data *fmd = data; @@ -302,6 +320,11 @@ static int find_dynamic_module(struct dl_phdr_info *info, size_t sz, void *data) struct uftrace_mmap *map; bool is_executable = mcount_is_main_executable(info->dlpi_name, sym_info->filename); + if (fmd->skip_first) { /* skip main binary */ + fmd->skip_first = false; + return !fmd->needs_modules; + } + mdi = create_mdi(info); map = find_map(sym_info, mdi->base_addr); @@ -309,8 +332,12 @@ static int find_dynamic_module(struct dl_phdr_info *info, size_t sz, void *data) mdi->map = map; mcount_arch_find_module(mdi, &map->mod->symtab); + if (mcount_setup_trampoline(mdi) < 0) + mdi->trampoline = 0; + mdi->next = mdinfo; mdinfo = mdi; + pr_dbg3("load dynamic info for '%s'\n", mdi->map->libname); } else { free(mdi); @@ -319,6 +346,12 @@ static int find_dynamic_module(struct dl_phdr_info *info, size_t sz, void *data) return !fmd->needs_modules && is_executable; } +/** + * prepare_dynamic_update - create dynamic data structures and load dynamic + * modules info + * @sinfo - dynamic symbol info + * @needs_modules - whether to prepare dynamic modules or only the main binary + */ static void prepare_dynamic_update(struct uftrace_sym_info *sinfo, bool needs_modules) { struct find_module_data fmd = { @@ -327,12 +360,34 @@ static void prepare_dynamic_update(struct uftrace_sym_info *sinfo, bool needs_mo }; int hash_size = sinfo->exec_map->mod->symtab.nr_sym * 3 / 4; - if (needs_modules) - hash_size *= 2; + if (code_hmap && !needs_modules) /* already prepared */ + return; + if (!code_hmap) { /* nothing loaded */ + if (needs_modules) + hash_size *= 2; + code_hmap = hashmap_create(hash_size, hashmap_ptr_hash, hashmap_ptr_equals); + } + else if (mdinfo && !mdinfo->next) /* main binary loaded, no module loaded */ + fmd.skip_first = true; + else + return; - code_hmap = hashmap_create(hash_size, hashmap_ptr_hash, hashmap_ptr_equals); + dl_iterate_phdr(prepare_dynamic_module, &fmd); +} - dl_iterate_phdr(find_dynamic_module, &fmd); +/** + * setup_size_filter - initialize size filter if not set + */ +void setup_size_filter() +{ + char *size_filter; + + if (min_size) + return; + + size_filter = getenv("UFTRACE_MIN_SIZE"); + if (size_filter) + min_size = strtoul(size_filter, NULL, 0); } struct mcount_dynamic_info *setup_trampoline(struct uftrace_mmap *map) @@ -384,10 +439,17 @@ static bool match_pattern_module(char *pathname) return ret; } -static bool match_pattern_list(struct uftrace_mmap *map, char *soname, char *sym_name) +/** + * match_pattern_list - match a symbol name against a pattern list + * @map - memory map of the symbol + * @soname - name of the module + * @sym_name - name of the symbol + * @return - -1 if match negative, 1 if match positive, 0 if no match + */ +static int match_pattern_list(struct uftrace_mmap *map, char *soname, char *sym_name) { struct patt_list *pl; - bool ret = false; + int ret = 0; char *libname = basename(map->libname); list_for_each_entry(pl, &patterns, list) { @@ -398,7 +460,7 @@ static bool match_pattern_list(struct uftrace_mmap *map, char *soname, char *sym continue; if (match_filter_pattern(&pl->patt, sym_name)) - ret = pl->positive; + ret = pl->positive ? 1 : -1; } return ret; @@ -410,7 +472,6 @@ static void parse_pattern_list(char *patch_funcs, char *def_mod, enum uftrace_pa char *name; int j; struct patt_list *pl; - bool all_negative = true; strv_split(&funcs, patch_funcs, ";"); @@ -421,10 +482,8 @@ static void parse_pattern_list(char *patch_funcs, char *def_mod, enum uftrace_pa if (name[0] == '!') name++; - else { + else pl->positive = true; - all_negative = false; - } delim = strchr(name, '@'); if (delim == NULL) { @@ -439,20 +498,6 @@ static void parse_pattern_list(char *patch_funcs, char *def_mod, enum uftrace_pa list_add_tail(&pl->list, &patterns); } - /* prepend match-all pattern, if all patterns are negative */ - if (all_negative) { - pl = xzalloc(sizeof(*pl)); - pl->positive = true; - pl->module = xstrdup(def_mod); - - if (ptype == PATT_REGEX) - init_filter_pattern(ptype, &pl->patt, "."); - else - init_filter_pattern(PATT_GLOB, &pl->patt, "*"); - - list_add(&pl->list, &patterns); - } - strv_free(&funcs); } @@ -487,12 +532,6 @@ static bool skip_sym(struct uftrace_symbol *sym, struct mcount_dynamic_info *mdi if (sym->type != ST_LOCAL_FUNC && sym->type != ST_GLOBAL_FUNC && sym->type != ST_WEAK_FUNC) return true; - if (!match_pattern_list(map, soname, sym->name)) { - if (mcount_unpatch_func(mdi, sym, &disasm) == 0) - stats.unpatch++; - return true; - } - return false; } @@ -501,16 +540,31 @@ static void mcount_patch_func_with_stats(struct mcount_dynamic_info *mdi, { switch (mcount_patch_func(mdi, sym, &disasm, min_size)) { case INSTRUMENT_FAILED: - stats.failed++; + patch_stats.failed++; + break; + case INSTRUMENT_SKIPPED: + patch_stats.skipped++; + break; + case INSTRUMENT_SUCCESS: + patch_stats.success++; + break; + } +} + +static void mcount_unpatch_func_with_stats(struct mcount_dynamic_info *mdi, + struct uftrace_symbol *sym) +{ + switch (mcount_unpatch_func(mdi, sym)) { + case INSTRUMENT_FAILED: + unpatch_stats.failed++; break; case INSTRUMENT_SKIPPED: - stats.skipped++; + unpatch_stats.skipped++; break; case INSTRUMENT_SUCCESS: - default: + unpatch_stats.success++; break; } - stats.total++; } static void patch_patchable_func_matched(struct mcount_dynamic_info *mdi, struct uftrace_mmap *map) @@ -559,6 +613,7 @@ static void patch_normal_func_matched(struct mcount_dynamic_info *mdi, struct uf unsigned i; struct uftrace_symbol *sym; bool found = false; + int match; char *soname = get_soname(map->libname); symtab = &map->mod->symtab; @@ -568,13 +623,22 @@ static void patch_normal_func_matched(struct mcount_dynamic_info *mdi, struct uf if (skip_sym(sym, mdi, map, soname)) continue; - found = true; - mcount_patch_func_with_stats(mdi, sym); + + match = match_pattern_list(map, soname, sym->name); + if (!match) + continue; + else if (match == 1) + mcount_patch_func_with_stats(mdi, sym); + else + mcount_unpatch_func_with_stats(mdi, sym); } if (!found) - stats.nomatch++; + stats_nomatch++; + + mcount_patch_normal_func_fini(); + mcount_unpatch_normal_func_fini(); free(soname); } @@ -594,10 +658,18 @@ static void patch_func_matched(struct mcount_dynamic_info *mdi, struct uftrace_m patch_normal_func_matched(mdi, map); } +/** + * do_dynamic_update - apply (un)patching across loaded modules' maps + * @sinfo - dynamic symbol info + * @patch_funcs - spec of symbols to (un)patch + * @ptype - matching pattern type + * @return - 0 (unused) + */ static int do_dynamic_update(struct uftrace_sym_info *sinfo, char *patch_funcs, enum uftrace_pattern_type ptype) { struct uftrace_mmap *map; + struct mcount_dynamic_info *mdi; char *def_mod; if (patch_funcs == NULL) @@ -606,41 +678,38 @@ static int do_dynamic_update(struct uftrace_sym_info *sinfo, char *patch_funcs, def_mod = basename(sinfo->exec_map->libname); parse_pattern_list(patch_funcs, def_mod, ptype); - for_each_map(sinfo, map) { - struct mcount_dynamic_info *mdi; + memset(&patch_stats, 0, sizeof(patch_stats)); + memset(&unpatch_stats, 0, sizeof(unpatch_stats)); - /* TODO: filter out unsuppported libs */ - mdi = setup_trampoline(map); - if (mdi == NULL) - continue; + /* TODO: filter out unsupported libs */ + for (mdi = mdinfo; mdi != NULL; mdi = mdi->next) { + if (mdi->type == DYNAMIC_NONE && !mcount_dynamic_is_initialized) { + if (mcount_arch_dynamic_init() >= 0) + mcount_dynamic_is_initialized = true; + else + continue; + } - patch_func_matched(mdi, map); + map = mdi->map; + if (mdi->trampoline) + patch_func_matched(mdi, map); } - if (stats.failed + stats.skipped + stats.nomatch == 0) { - pr_dbg("patched all (%d) functions in '%s'\n", stats.total, + release_pattern_list(); + + if (patch_stats.success && patch_stats.failed + patch_stats.skipped == 0) { + int patch_total = patch_stats.success + patch_stats.failed + patch_stats.skipped; + pr_dbg("patched all (%d) functions in '%s'\n", patch_total, basename(sinfo->filename)); } - - return 0; -} - -static void freeze_dynamic_update(void) -{ - struct mcount_dynamic_info *mdi, *tmp; - - mdi = mdinfo; - while (mdi) { - tmp = mdi->next; - - mcount_arch_dynamic_recover(mdi, &disasm); - mcount_cleanup_trampoline(mdi); - free(mdi); - - mdi = tmp; + if (unpatch_stats.success && unpatch_stats.failed + unpatch_stats.skipped == 0) { + int unpatch_total = + unpatch_stats.success + unpatch_stats.failed + unpatch_stats.skipped; + pr_dbg("unpatched all (%d) functions in '%s'\n", unpatch_total, + basename(sinfo->filename)); } - mcount_freeze_code(); + return 0; } /* do not use floating-point in libmcount */ @@ -652,39 +721,58 @@ static int calc_percent(int n, int total, int *rem) return quot; } +/** + * mcount_dynamic_update - prepare and perform dynamic patching or unpatching, + * then display statistics + * @sinfo - dynamic symbol info + * @patch_funcs - spec of symbols to patch or unpatch + * @ptype - matching pattern type + * @return - 0 (unused) + */ int mcount_dynamic_update(struct uftrace_sym_info *sinfo, char *patch_funcs, enum uftrace_pattern_type ptype) { int ret = 0; - char *size_filter; bool needs_modules = !!strchr(patch_funcs, '@'); mcount_disasm_init(&disasm); - prepare_dynamic_update(sinfo, needs_modules); - - size_filter = getenv("UFTRACE_MIN_SIZE"); - if (size_filter != NULL) - min_size = strtoul(size_filter, NULL, 0); + setup_size_filter(); ret = do_dynamic_update(sinfo, patch_funcs, ptype); - if (stats.total && (stats.failed || stats.skipped)) { - int success = stats.total - stats.failed - stats.skipped; + if (patch_stats.failed || patch_stats.skipped) { + int total = patch_stats.success + patch_stats.failed + patch_stats.skipped; int r, q; pr_dbg("dynamic patch stats for '%s'\n", basename(sinfo->filename)); - pr_dbg(" total: %8d\n", stats.total); - q = calc_percent(success, stats.total, &r); - pr_dbg(" patched: %8d (%2d.%02d%%)\n", success, q, r); - q = calc_percent(stats.failed, stats.total, &r); - pr_dbg(" failed: %8d (%2d.%02d%%)\n", stats.failed, q, r); - q = calc_percent(stats.skipped, stats.total, &r); - pr_dbg(" skipped: %8d (%2d.%02d%%)\n", stats.skipped, q, r); - pr_dbg("no match: %8d\n", stats.nomatch); + pr_dbg(" total: %8d\n", total); + q = calc_percent(patch_stats.success, total, &r); + pr_dbg(" patched: %8d (%2d.%02d%%)\n", patch_stats.success, q, r); + q = calc_percent(patch_stats.failed, total, &r); + pr_dbg(" failed: %8d (%2d.%02d%%)\n", patch_stats.failed, q, r); + q = calc_percent(patch_stats.skipped, total, &r); + pr_dbg(" skipped: %8d (%2d.%02d%%)\n", patch_stats.skipped, q, r); } - freeze_dynamic_update(); + if (unpatch_stats.failed || unpatch_stats.skipped) { + int total = unpatch_stats.success + unpatch_stats.failed + unpatch_stats.skipped; + int r, q; + + pr_dbg("dynamic unpatch stats for '%s'\n", basename(sinfo->filename)); + pr_dbg(" total: %8d\n", total); + q = calc_percent(unpatch_stats.success, total, &r); + pr_dbg(" unpatched: %8d (%2d.%02d%%)\n", unpatch_stats.success, q, r); + q = calc_percent(unpatch_stats.failed, total, &r); + pr_dbg(" failed: %8d (%2d.%02d%%)\n", unpatch_stats.failed, q, r); + q = calc_percent(unpatch_stats.skipped, total, &r); + pr_dbg(" skipped: %8d (%2d.%02d%%)\n", unpatch_stats.skipped, q, r); + } + + if (stats_nomatch) + pr_dbg("no match: %8d\n", stats_nomatch); + + mcount_freeze_code(); return ret; } @@ -730,9 +818,29 @@ void mcount_dynamic_dlopen(struct uftrace_sym_info *sinfo, struct dl_phdr_info * mcount_freeze_code(); } +/** + * mcount_free_mdinfo - free all dynamic info structures + */ +static void mcount_free_mdinfo(void) +{ + struct mcount_dynamic_info *mdi, *tmp; + + mdi = mdinfo; + while (mdi) { + tmp = mdi->next; + + mcount_arch_dynamic_recover(mdi, &disasm); + mcount_cleanup_trampoline(mdi); + free(mdi); + + mdi = tmp; + } +} + void mcount_dynamic_finish(void) { release_pattern_list(); + mcount_free_mdinfo(); mcount_disasm_finish(&disasm); } @@ -846,27 +954,27 @@ TEST_CASE(dynamic_pattern_list) pr_dbg("check simple match with default module\n"); parse_pattern_list("abc;!def", "main", PATT_SIMPLE); - TEST_EQ(match_pattern_list(main_map, NULL, "abc"), true); - TEST_EQ(match_pattern_list(main_map, NULL, "def"), false); - TEST_EQ(match_pattern_list(other_map, NULL, "xyz"), false); + TEST_EQ(match_pattern_list(main_map, NULL, "abc"), 1); + TEST_EQ(match_pattern_list(main_map, NULL, "def"), -1); + TEST_EQ(match_pattern_list(other_map, NULL, "xyz"), 0); release_pattern_list(); pr_dbg("check negative regex match with default module\n"); parse_pattern_list("!^a", "main", PATT_REGEX); - TEST_EQ(match_pattern_list(main_map, NULL, "abc"), false); - TEST_EQ(match_pattern_list(main_map, NULL, "def"), true); - TEST_EQ(match_pattern_list(other_map, NULL, "xyz"), false); + TEST_EQ(match_pattern_list(main_map, NULL, "abc"), -1); + TEST_EQ(match_pattern_list(main_map, NULL, "def"), 0); + TEST_EQ(match_pattern_list(other_map, NULL, "xyz"), 0); release_pattern_list(); pr_dbg("check wildcard match with other module\n"); parse_pattern_list("*@other", "main", PATT_GLOB); - TEST_EQ(match_pattern_list(main_map, NULL, "abc"), false); - TEST_EQ(match_pattern_list(main_map, NULL, "def"), false); - TEST_EQ(match_pattern_list(other_map, NULL, "xyz"), true); + TEST_EQ(match_pattern_list(main_map, NULL, "abc"), 0); + TEST_EQ(match_pattern_list(main_map, NULL, "def"), 0); + TEST_EQ(match_pattern_list(other_map, NULL, "xyz"), 1); release_pattern_list(); diff --git a/libmcount/dynamic.h b/libmcount/dynamic.h index c687e5e37..21055fc43 100644 --- a/libmcount/dynamic.h +++ b/libmcount/dynamic.h @@ -68,6 +68,12 @@ struct mcount_disasm_engine { #define INSTRUMENT_FAILED -1 #define INSTRUMENT_SKIPPED -2 +struct mcount_dynamic_stats { + int success; + int failed; + int skipped; +}; + /* * Supposing the size of smallest conditional branch is 2 byte. * We can replace, at most, 3 of them by the instrumentation diff --git a/libmcount/mcount.c b/libmcount/mcount.c index 4717a8208..c185592a5 100644 --- a/libmcount/mcount.c +++ b/libmcount/mcount.c @@ -52,6 +52,9 @@ bool mcount_auto_recover = ARCH_SUPPORT_AUTO_RECOVER; /* global flag to control mcount behavior */ unsigned long mcount_global_flags = MCOUNT_GFL_SETUP; +/* global to indicate if the target is executing */ +bool mcount_target_running = false; + /* TSD key to save mtd below */ pthread_key_t mtd_key = (pthread_key_t)-1; @@ -76,6 +79,13 @@ int page_size_in_kb; /* call depth to filter */ static int __maybe_unused mcount_depth = MCOUNT_DEFAULT_DEPTH; +/* setting for all filter actions */ +static struct uftrace_filter_setting mcount_filter_setting = { + .ptype = PATT_REGEX, + .auto_args = false, + .allow_kernel = false, +}; + /* boolean flag to turn on/off recording */ static bool __maybe_unused mcount_enabled = true; @@ -106,7 +116,7 @@ static pthread_t agent; /* state flag for the agent */ static volatile bool agent_run = false; -#define MCOUNT_AGENT_CAPABILITIES 0 +#define MCOUNT_AGENT_CAPABILITIES (UFTRACE_AGENT_OPT_PATTERN | UFTRACE_AGENT_OPT_PATCH) __weak void dynamic_return(void) { @@ -114,7 +124,7 @@ __weak void dynamic_return(void) #ifdef DISABLE_MCOUNT_FILTER -static void mcount_filter_init(enum uftrace_pattern_type ptype, bool force) +static void mcount_filter_init(struct uftrace_filter_setting *filter_setting, bool force) { if (getenv("UFTRACE_SRCLINE") == NULL) return; @@ -122,7 +132,7 @@ static void mcount_filter_init(enum uftrace_pattern_type ptype, bool force) load_module_symtabs(&mcount_sym_info); /* use debug info if available */ - prepare_debug_info(&mcount_sym_info, ptype, NULL, NULL, false, force); + prepare_debug_info(&mcount_sym_info, filter_setting->ptype, NULL, NULL, false, force); save_debug_info(&mcount_sym_info, mcount_sym_info.dirname); } @@ -366,7 +376,7 @@ static void mcount_signal_finish(void) } } -static void mcount_filter_init(enum uftrace_pattern_type ptype, bool force) +static void mcount_filter_init(struct uftrace_filter_setting *filter_setting, bool force) { char *filter_str = getenv("UFTRACE_FILTER"); char *trigger_str = getenv("UFTRACE_TRIGGER"); @@ -376,23 +386,19 @@ static void mcount_filter_init(enum uftrace_pattern_type ptype, bool force) char *caller_str = getenv("UFTRACE_CALLER"); char *loc_str = getenv("UFTRACE_LOCATION"); - struct uftrace_filter_setting filter_setting = { - .ptype = ptype, - .auto_args = false, - .allow_kernel = false, - .lp64 = host_is_lp64(), - .arch = host_cpu_arch(), - }; + filter_setting->lp64 = host_is_lp64(); + filter_setting->arch = host_cpu_arch(); + bool needs_debug_info = false; load_module_symtabs(&mcount_sym_info); - mcount_signal_init(getenv("UFTRACE_SIGNAL"), &filter_setting); + mcount_signal_init(getenv("UFTRACE_SIGNAL"), filter_setting); /* setup auto-args only if argument/return value is used */ if (argument_str || retval_str || autoargs_str || (trigger_str && (strstr(trigger_str, "arg") || strstr(trigger_str, "retval")))) { - setup_auto_args(&filter_setting); + setup_auto_args(filter_setting); needs_debug_info = true; } @@ -401,28 +407,28 @@ static void mcount_filter_init(enum uftrace_pattern_type ptype, bool force) /* use debug info if available */ if (needs_debug_info) { - prepare_debug_info(&mcount_sym_info, ptype, argument_str, retval_str, - !!autoargs_str, force); + prepare_debug_info(&mcount_sym_info, filter_setting->ptype, argument_str, + retval_str, !!autoargs_str, force); save_debug_info(&mcount_sym_info, mcount_sym_info.dirname); } mcount_triggers = xmalloc(sizeof(*mcount_triggers)); *mcount_triggers = RB_ROOT; uftrace_setup_filter(filter_str, &mcount_sym_info, mcount_triggers, &mcount_filter_mode, - &filter_setting); + filter_setting); uftrace_setup_trigger(trigger_str, &mcount_sym_info, mcount_triggers, &mcount_filter_mode, - &filter_setting); - uftrace_setup_argument(argument_str, &mcount_sym_info, mcount_triggers, &filter_setting); - uftrace_setup_retval(retval_str, &mcount_sym_info, mcount_triggers, &filter_setting); + filter_setting); + uftrace_setup_argument(argument_str, &mcount_sym_info, mcount_triggers, filter_setting); + uftrace_setup_retval(retval_str, &mcount_sym_info, mcount_triggers, filter_setting); if (needs_debug_info) { uftrace_setup_loc_filter(loc_str, &mcount_sym_info, mcount_triggers, - &mcount_loc_mode, &filter_setting); + &mcount_loc_mode, filter_setting); } if (caller_str) { uftrace_setup_caller_filter(caller_str, &mcount_sym_info, mcount_triggers, - &filter_setting); + filter_setting); } /* there might be caller triggers, count it separately */ @@ -433,13 +439,13 @@ static void mcount_filter_init(enum uftrace_pattern_type ptype, bool force) char *autoarg = "."; char *autoret = "."; - if (ptype == PATT_GLOB) + if (filter_setting->ptype == PATT_GLOB) autoarg = autoret = "*"; - filter_setting.auto_args = true; + filter_setting->auto_args = true; - uftrace_setup_argument(autoarg, &mcount_sym_info, mcount_triggers, &filter_setting); - uftrace_setup_retval(autoret, &mcount_sym_info, mcount_triggers, &filter_setting); + uftrace_setup_argument(autoarg, &mcount_sym_info, mcount_triggers, filter_setting); + uftrace_setup_retval(autoret, &mcount_sym_info, mcount_triggers, filter_setting); } if (getenv("UFTRACE_DEPTH")) @@ -1864,9 +1870,23 @@ static int agent_read_option(int fd, int *opt, void **value, size_t read_size) */ static int agent_apply_option(int opt, void *value, size_t size, struct rb_root *triggers) { + enum uftrace_pattern_type patt_type; int ret = 0; switch (opt) { + case UFTRACE_AGENT_OPT_PATTERN: + patt_type = *((int *)value); + if (patt_type != mcount_filter_setting.ptype) { + mcount_filter_setting.ptype = patt_type; + pr_dbg4("use pattern type %#x\n", patt_type); + } + break; + + case UFTRACE_AGENT_OPT_PATCH: + pr_dbg4("apply patch '%s' (size=%d)\n", value, size); + mcount_dynamic_update(&mcount_sym_info, value, mcount_filter_setting.ptype); + break; + default: ret = -1; } @@ -2060,7 +2080,6 @@ static __used void mcount_startup(void) char *symdir_str; struct stat statbuf; bool nest_libcall; - enum uftrace_pattern_type patt_type = PATT_REGEX; if (!(mcount_global_flags & MCOUNT_GFL_SETUP)) return; @@ -2161,14 +2180,14 @@ static __used void mcount_startup(void) record_proc_maps(dirname, mcount_session_name(), &mcount_sym_info); if (pattern_str) - patt_type = parse_filter_pattern(pattern_str); + mcount_filter_setting.ptype = parse_filter_pattern(pattern_str); if (patch_str) mcount_return_fn = (unsigned long)dynamic_return; else mcount_return_fn = (unsigned long)mcount_return; - mcount_filter_init(patt_type, !!patch_str); + mcount_filter_init(&mcount_filter_setting, !!patch_str); mcount_watch_init(); if (maxstack_str) @@ -2181,10 +2200,10 @@ static __used void mcount_startup(void) mcount_min_size = strtoul(minsize_str, NULL, 0); if (patch_str) - mcount_dynamic_update(&mcount_sym_info, patch_str, patt_type); + mcount_dynamic_update(&mcount_sym_info, patch_str, mcount_filter_setting.ptype); if (event_str) - mcount_setup_events(dirname, event_str, patt_type); + mcount_setup_events(dirname, event_str, mcount_filter_setting.ptype); if (getenv("UFTRACE_KERNEL_PID_UPDATE")) kernel_pid_update = true; @@ -2209,13 +2228,14 @@ static __used void mcount_startup(void) /* initialize script binding */ if (SCRIPT_ENABLED && script_str) - mcount_script_init(patt_type); + mcount_script_init(mcount_filter_setting.ptype); compiler_barrier(); pr_dbg("mcount setup done\n"); mcount_global_flags &= ~MCOUNT_GFL_SETUP; mtd.recursion_marker = false; + mcount_target_running = true; } static void mcount_cleanup(void) diff --git a/uftrace.h b/uftrace.h index 2ee74fe01..c1fbcc67d 100644 --- a/uftrace.h +++ b/uftrace.h @@ -458,7 +458,8 @@ struct uftrace_msg_dlopen { }; enum uftrace_agent_opt { - UFTRACE_AGENT_OPT_XXX, + UFTRACE_AGENT_OPT_PATTERN = (1U << 0), /* pattern match type */ + UFTRACE_AGENT_OPT_PATCH = (1U << 1), /* patch string */ }; extern struct uftrace_session *first_session; diff --git a/utils/utils.c b/utils/utils.c index 592b3df40..e85de181f 100644 --- a/utils/utils.c +++ b/utils/utils.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #ifdef HAVE_LIBUNWIND @@ -1001,6 +1002,121 @@ int copy_file(const char *path_in, const char *path_out) return 0; } +#ifndef gettid +/** + * gettid - gettid syscall wrapper (glibc < 2.30) + * @return - thread id + */ +pid_t gettid() +{ + return syscall(__NR_gettid); +} +#endif + +#ifndef tgkill +/** + * tgkill - tgkill syscall wrapper (glibc < 2.30) + * @tgid - thread group id + * @tid - thread id + * @signal - signal to send + * @return - 0 on success, -1 on error + */ +int tgkill(pid_t tgid, pid_t tid, int signal) +{ + return syscall(SYS_tgkill, tgid, tid, signal); +} +#endif + +/** + * find_unused_sigrt - find a real-time signal with no associated action + * @return - unused RT signal, or -1 if none found + */ +int find_unused_sigrt() +{ + int sig; + struct sigaction oldact; + + for (sig = SIGRTMIN; sig <= SIGRTMAX; sig++) { + if (sigaction(sig, NULL, &oldact) < 0) { + pr_dbg3("failed to check RT signal handler\n"); + continue; + } + + if (!oldact.sa_handler) + return sig; + } + + pr_dbg2("failed to find unused SIGRT\n"); + return -1; +} + +/** + * thread_broadcast_signal - send a signal to all the other running threads in + * the process + * @sig - signal to send + * @return - number of signals sent, -1 on error + */ +int thread_broadcast_signal(int sig) +{ + char path[32]; + DIR *dir; + struct dirent *dirent; + pid_t pid, tid, task; + int signal_count = 0; + + pid = getpid(); + tid = gettid(); + + snprintf(path, 32, "/proc/%u/task", pid); + dir = opendir(path); + if (!dir) { + pr_dbg("failed to open directory '%s'\n", path); + goto fail_open_dir; + } + + errno = 0; + for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) { + /* skip "." and ".." directories */ + if (dirent->d_name[0] == '.') + continue; + + task = strtol(dirent->d_name, NULL, 10); + if (errno != 0 || task < 0) { + pr_dbg("failed to parse TID '%s'\n", dirent->d_name); + continue; + } + + /* ignore our TID */ + if (task == tid) + continue; + + /* + * By reading /proc//task directory, there is the possibility of + * a race condition where a thread exits before we send the signal. + */ + pr_dbg4("send SIGRT%d to %u\n", sig, task); + if (tgkill(pid, task, sig) < 0) { + if (errno != ESRCH) { + pr_dbg2("cannot signal thread %u: %s\n", task, strerror(errno)); + errno = 0; + } + } + else + signal_count++; + } + + if (errno != 0) + pr_dbg("failed to read directory entry\n"); + + if (closedir(dir) < 0) + pr_dbg2("failed to close directory\n"); + + return signal_count; + +fail_open_dir: + return -1; +} + #ifdef UNIT_TEST TEST_CASE(utils_parse_cmdline) { diff --git a/utils/utils.h b/utils/utils.h index fb0d8c4ed..3d11a9f5f 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -429,4 +429,10 @@ void stacktrace(void); int copy_file(const char *path_in, const char *path_out); +pid_t gettid(); +int tgkill(pid_t tgid, pid_t tid, int signal); + +int find_unused_sigrt(); +int thread_broadcast_signal(int sig); + #endif /* UFTRACE_UTILS_H */