From b0557dd76a5bc578d054e7ec8570d936af9d985b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Mon, 10 Apr 2023 10:15:53 -0400 Subject: [PATCH 01/30] dynamic: Load dynamic info only once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is in preparation of runtime dynamic patching. This commit guarantees that dynamic info is read only once for the target binary and for each module. Co-authored-by: Gabriel-Andrew Pollo-Guilbert Signed-off-by: Clément Guidi --- libmcount/dynamic.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/libmcount/dynamic.c b/libmcount/dynamic.c index c9c058eb5..1272fe073 100644 --- a/libmcount/dynamic.c +++ b/libmcount/dynamic.c @@ -258,6 +258,7 @@ __weak void mcount_arch_patch_branch(struct mcount_disasm_info *info, struct mco struct find_module_data { struct uftrace_sym_info *sinfo; + bool skip_first; /* fist module is the main binary */ bool needs_modules; }; @@ -302,6 +303,11 @@ static int find_dynamic_module(struct dl_phdr_info *info, size_t sz, void *data) struct uftrace_mmap *map; bool is_executable = mcount_is_main_executable(info->dlpi_name, sym_info->filename); + if (fmd->skip_first) { /* skip main binary */ + fmd->skip_first = false; + return !fmd->needs_modules; + } + mdi = create_mdi(info); map = find_map(sym_info, mdi->base_addr); @@ -311,6 +317,7 @@ static int find_dynamic_module(struct dl_phdr_info *info, size_t sz, void *data) mdi->next = mdinfo; mdinfo = mdi; + pr_dbg3("load dynamic info for '%s'\n", mdi->map->libname); } else { free(mdi); @@ -319,6 +326,12 @@ static int find_dynamic_module(struct dl_phdr_info *info, size_t sz, void *data) return !fmd->needs_modules && is_executable; } +/** + * prepare_dynamic_update - create dynamic data structures and load dynamic + * modules info + * @sinfo - dynamic symbol info + * @needs_modules - whether to prepare dynamic modules or only the main binary + */ static void prepare_dynamic_update(struct uftrace_sym_info *sinfo, bool needs_modules) { struct find_module_data fmd = { @@ -327,10 +340,17 @@ static void prepare_dynamic_update(struct uftrace_sym_info *sinfo, bool needs_mo }; int hash_size = sinfo->exec_map->mod->symtab.nr_sym * 3 / 4; - if (needs_modules) - hash_size *= 2; - - code_hmap = hashmap_create(hash_size, hashmap_ptr_hash, hashmap_ptr_equals); + if (code_hmap && !needs_modules) /* already prepared */ + return; + if (!code_hmap) { /* nothing loaded */ + if (needs_modules) + hash_size *= 2; + code_hmap = hashmap_create(hash_size, hashmap_ptr_hash, hashmap_ptr_equals); + } + else if (mdinfo && !mdinfo->next) /* main binary loaded, no module loaded */ + fmd.skip_first = true; + else + return; dl_iterate_phdr(find_dynamic_module, &fmd); } From e65ea2f6f0cb76e7f43811bd62e0a6933431d427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Mon, 10 Apr 2023 10:18:31 -0400 Subject: [PATCH 02/30] dynamic: Initialize size filter only once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If 'mcount_dynamic_update' is called multiple times (e.g. at runtime), it initializes the size filter only once. Signed-off-by: Clément Guidi --- libmcount/dynamic.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/libmcount/dynamic.c b/libmcount/dynamic.c index 1272fe073..abf3c1a7f 100644 --- a/libmcount/dynamic.c +++ b/libmcount/dynamic.c @@ -355,6 +355,21 @@ static void prepare_dynamic_update(struct uftrace_sym_info *sinfo, bool needs_mo dl_iterate_phdr(find_dynamic_module, &fmd); } +/** + * setup_size_filter - initialize size filter if not set + */ +void setup_size_filter() +{ + char *size_filter; + + if (min_size) + return; + + size_filter = getenv("UFTRACE_MIN_SIZE"); + if (size_filter) + min_size = strtoul(size_filter, NULL, 0); +} + struct mcount_dynamic_info *setup_trampoline(struct uftrace_mmap *map) { struct mcount_dynamic_info *mdi; @@ -676,16 +691,13 @@ int mcount_dynamic_update(struct uftrace_sym_info *sinfo, char *patch_funcs, enum uftrace_pattern_type ptype) { int ret = 0; - char *size_filter; bool needs_modules = !!strchr(patch_funcs, '@'); mcount_disasm_init(&disasm); prepare_dynamic_update(sinfo, needs_modules); - size_filter = getenv("UFTRACE_MIN_SIZE"); - if (size_filter != NULL) - min_size = strtoul(size_filter, NULL, 0); + setup_size_filter(); ret = do_dynamic_update(sinfo, patch_funcs, ptype); From b81a6fa46ab63664c1f4f7780ec6c01031d03a9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Mon, 10 Apr 2023 10:20:35 -0400 Subject: [PATCH 03/30] dynamic: arch: Initialize disassembly engine once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skip the initialization of the disassembly engine with it has already been performed. Signed-off-by: Clément Guidi --- arch/aarch64/mcount-insn.c | 11 ++++++++++- arch/x86_64/mcount-insn.c | 19 ++++++++++++++----- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/arch/aarch64/mcount-insn.c b/arch/aarch64/mcount-insn.c index 13488487b..7e0a2dc8a 100644 --- a/arch/aarch64/mcount-insn.c +++ b/arch/aarch64/mcount-insn.c @@ -8,10 +8,19 @@ #include #include +/** + * mcount_disasm_init - initialize the capstone engine once + * @disasm - mcount disassembly engine + */ void mcount_disasm_init(struct mcount_disasm_engine *disasm) { + if (disasm->engine) + return; + + pr_dbg2("initialize disassembly engine\n"); + if (cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &disasm->engine) != CS_ERR_OK) { - pr_dbg("failed to init Capstone disasm engine\n"); + pr_dbg("failed to init capstone disasm engine\n"); return; } diff --git a/arch/x86_64/mcount-insn.c b/arch/x86_64/mcount-insn.c index 4097f753a..740216d99 100644 --- a/arch/x86_64/mcount-insn.c +++ b/arch/x86_64/mcount-insn.c @@ -19,8 +19,17 @@ struct disasm_check_data { uint32_t size; }; +/** + * mcount_disasm_init - initialize the capstone engine once + * @disasm - mcount disassembly engine + */ void mcount_disasm_init(struct mcount_disasm_engine *disasm) { + if (disasm->engine) + return; + + pr_dbg2("initialize disassembly engine\n"); + if (cs_open(CS_ARCH_X86, CS_MODE_64, &disasm->engine) != CS_ERR_OK) { pr_dbg("failed to init capstone disasm engine\n"); return; @@ -653,7 +662,7 @@ TEST_CASE(dynamic_x86_handle_lea) .addr = 0x3000, .size = 32, }; - struct mcount_disasm_engine disasm; + struct mcount_disasm_engine disasm = { 0 }; struct mcount_disasm_info info = { .sym = &sym, .addr = ORIGINAL_BASE + sym.addr, @@ -724,7 +733,7 @@ TEST_CASE(dynamic_x86_handle_call) .addr = 0x4000, .size = 32, }; - struct mcount_disasm_engine disasm; + struct mcount_disasm_engine disasm = { 0 }; struct mcount_disasm_info info = { .sym = &sym1, .addr = ORIGINAL_BASE + sym1.addr, @@ -802,7 +811,7 @@ TEST_CASE(dynamic_x86_handle_jmp) .addr = 0x3000, .size = 32, }; - struct mcount_disasm_engine disasm; + struct mcount_disasm_engine disasm = { 0 }; struct mcount_disasm_info info = { .sym = &sym, .addr = ORIGINAL_BASE + sym.addr, @@ -906,7 +915,7 @@ TEST_CASE(dynamic_x86_handle_jcc) .addr = 0x3000, .size = 32, }; - struct mcount_disasm_engine disasm; + struct mcount_disasm_engine disasm = { 0 }; struct mcount_disasm_info info = { .sym = &sym, .addr = ORIGINAL_BASE + sym.addr, @@ -1015,7 +1024,7 @@ TEST_CASE(dynamic_x86_handle_mov_load) .addr = 0x3000, .size = 32, }; - struct mcount_disasm_engine disasm; + struct mcount_disasm_engine disasm = { 0 }; struct mcount_disasm_info info = { .sym = &sym, .addr = ORIGINAL_BASE + sym.addr, From 9a1c695bb8c2bb49a33898b6101d8fe5ad898267 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Mon, 10 Apr 2023 10:22:27 -0400 Subject: [PATCH 04/30] dynamic: Discard the pattern list after update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dynamic pattern list is not reused from a dynamic update to another, keeping libmcount stateless in that regard. Co-authored-by: Gabriel-Andrew Pollo-Guilbert Signed-off-by: Clément Guidi --- libmcount/dynamic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libmcount/dynamic.c b/libmcount/dynamic.c index abf3c1a7f..bccde007a 100644 --- a/libmcount/dynamic.c +++ b/libmcount/dynamic.c @@ -652,6 +652,8 @@ static int do_dynamic_update(struct uftrace_sym_info *sinfo, char *patch_funcs, patch_func_matched(mdi, map); } + release_pattern_list(); + if (stats.failed + stats.skipped + stats.nomatch == 0) { pr_dbg("patched all (%d) functions in '%s'\n", stats.total, basename(sinfo->filename)); From 38b823bd3f105ad23a6f416f5ced2ebc5601540f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Mon, 10 Apr 2023 10:24:22 -0400 Subject: [PATCH 05/30] dynamic: Refactor 'mcount_dynamic_update' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'mcount_dynamic_update' is now safe to call multiple times, including at runtime. On each call, it will perform patching and unpatching of the target (not implemented yet). Signed-off-by: Clément Guidi --- libmcount/dynamic.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/libmcount/dynamic.c b/libmcount/dynamic.c index bccde007a..8c705bb14 100644 --- a/libmcount/dynamic.c +++ b/libmcount/dynamic.c @@ -689,6 +689,14 @@ static int calc_percent(int n, int total, int *rem) return quot; } +/** + * mcount_dynamic_update - prepare and perform dynamic patching or unpatching, + * then display statistics + * @sinfo - dynamic symbol info + * @patch_funcs - spec of symbols to patch or unpatch + * @ptype - matching pattern type + * @return - 0 (unused) + */ int mcount_dynamic_update(struct uftrace_sym_info *sinfo, char *patch_funcs, enum uftrace_pattern_type ptype) { @@ -696,9 +704,7 @@ int mcount_dynamic_update(struct uftrace_sym_info *sinfo, char *patch_funcs, bool needs_modules = !!strchr(patch_funcs, '@'); mcount_disasm_init(&disasm); - prepare_dynamic_update(sinfo, needs_modules); - setup_size_filter(); ret = do_dynamic_update(sinfo, patch_funcs, ptype); From faefcfa5d8a7da9d9073c543e8b8214c0c5ab7e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Tue, 7 Mar 2023 11:32:24 -0500 Subject: [PATCH 06/30] dynamic: Streamline trampoline life cycle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Install a trampoline for each loaded module map, on initialization. Keep the trampolines in memory to allow for dynamic patching at runtime. Clear the trampolines on libmcount exit. Co-authored-by: Gabriel-Andrew Pollo-Guilbert Signed-off-by: Clément Guidi --- libmcount/dynamic.c | 78 +++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/libmcount/dynamic.c b/libmcount/dynamic.c index 8c705bb14..d0e61516d 100644 --- a/libmcount/dynamic.c +++ b/libmcount/dynamic.c @@ -294,8 +294,15 @@ static struct mcount_dynamic_info *create_mdi(struct dl_phdr_info *info) return mdi; } -/* callback for dl_iterate_phdr() */ -static int find_dynamic_module(struct dl_phdr_info *info, size_t sz, void *data) +/** + * prepare_dynamic_module - store dynamic module info and install trampoline; + * callback for dl_iterate_phdr() + * @info - module info + * @sz - data size (unused) + * @data - callback data: symbol info and module parsing flag + * @return - stop iteration on non-zero value + */ +static int prepare_dynamic_module(struct dl_phdr_info *info, size_t sz, void *data) { struct mcount_dynamic_info *mdi; struct find_module_data *fmd = data; @@ -315,6 +322,9 @@ static int find_dynamic_module(struct dl_phdr_info *info, size_t sz, void *data) mdi->map = map; mcount_arch_find_module(mdi, &map->mod->symtab); + if (mcount_setup_trampoline(mdi) < 0) + mdi->trampoline = 0; + mdi->next = mdinfo; mdinfo = mdi; pr_dbg3("load dynamic info for '%s'\n", mdi->map->libname); @@ -352,7 +362,7 @@ static void prepare_dynamic_update(struct uftrace_sym_info *sinfo, bool needs_mo else return; - dl_iterate_phdr(find_dynamic_module, &fmd); + dl_iterate_phdr(prepare_dynamic_module, &fmd); } /** @@ -629,10 +639,18 @@ static void patch_func_matched(struct mcount_dynamic_info *mdi, struct uftrace_m patch_normal_func_matched(mdi, map); } +/** + * do_dynamic_update - apply (un)patching across loaded modules' maps + * @sinfo - dynamic symbol info + * @patch_funcs - spec of symbols to (un)patch + * @ptype - matching pattern type + * @return - 0 (unused) + */ static int do_dynamic_update(struct uftrace_sym_info *sinfo, char *patch_funcs, enum uftrace_pattern_type ptype) { struct uftrace_mmap *map; + struct mcount_dynamic_info *mdi; char *def_mod; if (patch_funcs == NULL) @@ -641,15 +659,11 @@ static int do_dynamic_update(struct uftrace_sym_info *sinfo, char *patch_funcs, def_mod = basename(sinfo->exec_map->libname); parse_pattern_list(patch_funcs, def_mod, ptype); - for_each_map(sinfo, map) { - struct mcount_dynamic_info *mdi; - - /* TODO: filter out unsuppported libs */ - mdi = setup_trampoline(map); - if (mdi == NULL) - continue; - - patch_func_matched(mdi, map); + /* TODO: filter out unsupported libs */ + for (mdi = mdinfo; mdi != NULL; mdi = mdi->next) { + map = mdi->map; + if (mdi->trampoline) + patch_func_matched(mdi, map); } release_pattern_list(); @@ -662,24 +676,6 @@ static int do_dynamic_update(struct uftrace_sym_info *sinfo, char *patch_funcs, return 0; } -static void freeze_dynamic_update(void) -{ - struct mcount_dynamic_info *mdi, *tmp; - - mdi = mdinfo; - while (mdi) { - tmp = mdi->next; - - mcount_arch_dynamic_recover(mdi, &disasm); - mcount_cleanup_trampoline(mdi); - free(mdi); - - mdi = tmp; - } - - mcount_freeze_code(); -} - /* do not use floating-point in libmcount */ static int calc_percent(int n, int total, int *rem) { @@ -724,7 +720,7 @@ int mcount_dynamic_update(struct uftrace_sym_info *sinfo, char *patch_funcs, pr_dbg("no match: %8d\n", stats.nomatch); } - freeze_dynamic_update(); + mcount_freeze_code(); return ret; } @@ -770,9 +766,29 @@ void mcount_dynamic_dlopen(struct uftrace_sym_info *sinfo, struct dl_phdr_info * mcount_freeze_code(); } +/** + * mcount_free_mdinfo - free all dynamic info structures + */ +static void mcount_free_mdinfo(void) +{ + struct mcount_dynamic_info *mdi, *tmp; + + mdi = mdinfo; + while (mdi) { + tmp = mdi->next; + + mcount_arch_dynamic_recover(mdi, &disasm); + mcount_cleanup_trampoline(mdi); + free(mdi); + + mdi = tmp; + } +} + void mcount_dynamic_finish(void) { release_pattern_list(); + mcount_free_mdinfo(); mcount_disasm_finish(&disasm); } From eb8e4df3e4b68e325134ea1e19b6c6110720cb41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Wed, 8 Mar 2023 14:45:32 -0500 Subject: [PATCH 07/30] dynamic: x86_64: Skip already patched functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skip the functions where uftrace already injected a call to a trampoline. Signed-off-by: Clément Guidi --- arch/x86_64/mcount-insn.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/x86_64/mcount-insn.c b/arch/x86_64/mcount-insn.c index 740216d99..5e0e1bfa0 100644 --- a/arch/x86_64/mcount-insn.c +++ b/arch/x86_64/mcount-insn.c @@ -578,8 +578,11 @@ int disasm_check_insns(struct mcount_disasm_engine *disasm, struct mcount_dynami cs_insn *insn = NULL; uint32_t count, i, size; uint8_t endbr64[] = { 0xf3, 0x0f, 0x1e, 0xfa }; + void *trampoline_addr; + uint8_t *operand; struct dynamic_bad_symbol *badsym; unsigned long addr = info->addr; + bool is_call, is_trampoline; badsym = mcount_find_badsym(mdi, info->addr); if (badsym != NULL) { @@ -611,6 +614,20 @@ int disasm_check_insns(struct mcount_disasm_engine *disasm, struct mcount_dynami if (count == 0) return INSTRUMENT_FAILED; + /* Check for pre-existing dynamic instrumentation + 1. check if opcode is call + 2. check operand is trampoline address */ + operand = &((uint8_t *)info->addr)[1]; + trampoline_addr = (void *)mdi->trampoline - (info->addr + CALL_INSN_SIZE); + is_call = ((uint8_t *)info->addr)[0] == 0xe8; + if (is_call) { + is_trampoline = !memcmp(operand, &trampoline_addr, CALL_INSN_SIZE - 1); + if (is_trampoline) { + pr_dbg2("skip dynamically patched func: %s\n", info->sym->name); + return INSTRUMENT_SKIPPED; + } + } + for (i = 0; i < count; i++) { uint8_t insns_byte[32] = { 0, From 03052c9ba15c904f41f67e70053b5396e766a018 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Fri, 10 Mar 2023 11:22:00 -0500 Subject: [PATCH 08/30] uftrace: Don't patch all functions by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Originally, when the user asks to only unpatch functions, uftrace would patch all other functions by default. This is counter-intuitive, especially when using the agent to unpatch at runtime. The user doesn't expect functions to be patched when they only unpatch funtions. This commit removes this behavior, and requires the user to explicitly define functions to patch. Co-authored-by: Gabriel-Andrew Pollo-Guilbert Signed-off-by: Clément Guidi --- libmcount/dynamic.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/libmcount/dynamic.c b/libmcount/dynamic.c index d0e61516d..ac5b20b36 100644 --- a/libmcount/dynamic.c +++ b/libmcount/dynamic.c @@ -455,7 +455,6 @@ static void parse_pattern_list(char *patch_funcs, char *def_mod, enum uftrace_pa char *name; int j; struct patt_list *pl; - bool all_negative = true; strv_split(&funcs, patch_funcs, ";"); @@ -466,10 +465,8 @@ static void parse_pattern_list(char *patch_funcs, char *def_mod, enum uftrace_pa if (name[0] == '!') name++; - else { + else pl->positive = true; - all_negative = false; - } delim = strchr(name, '@'); if (delim == NULL) { @@ -484,20 +481,6 @@ static void parse_pattern_list(char *patch_funcs, char *def_mod, enum uftrace_pa list_add_tail(&pl->list, &patterns); } - /* prepend match-all pattern, if all patterns are negative */ - if (all_negative) { - pl = xzalloc(sizeof(*pl)); - pl->positive = true; - pl->module = xstrdup(def_mod); - - if (ptype == PATT_REGEX) - init_filter_pattern(ptype, &pl->patt, "."); - else - init_filter_pattern(PATT_GLOB, &pl->patt, "*"); - - list_add(&pl->list, &patterns); - } - strv_free(&funcs); } From 417dfd083f69e0a8cee49ee6faeca2840f70025e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Tue, 21 Mar 2023 10:18:20 -0400 Subject: [PATCH 09/30] mcount: Don't unpatch unmatched functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Libmcount would try to unpatch by default any function that is not matched by the user patch or unpatch options. We remove this implicit behavior, so the user explicitly choses which function to unpatch. Signed-off-by: Clément Guidi --- libmcount/dynamic.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/libmcount/dynamic.c b/libmcount/dynamic.c index ac5b20b36..9182013e0 100644 --- a/libmcount/dynamic.c +++ b/libmcount/dynamic.c @@ -515,11 +515,8 @@ static bool skip_sym(struct uftrace_symbol *sym, struct mcount_dynamic_info *mdi if (sym->type != ST_LOCAL_FUNC && sym->type != ST_GLOBAL_FUNC && sym->type != ST_WEAK_FUNC) return true; - if (!match_pattern_list(map, soname, sym->name)) { - if (mcount_unpatch_func(mdi, sym, &disasm) == 0) - stats.unpatch++; + if (!match_pattern_list(map, soname, sym->name)) return true; - } return false; } From ad018309565d98759bb85732e8f800dcd092ce7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Mon, 10 Apr 2023 15:10:21 -0400 Subject: [PATCH 10/30] dynamic: Refactor 'match_pattern_list' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return whether a symbol is positively or negatively matched against a pattern list, or not matched at all. Co-authored-by: Gabriel-Andrew Pollo-Guilbert Signed-off-by: Clément Guidi --- libmcount/dynamic.c | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/libmcount/dynamic.c b/libmcount/dynamic.c index 9182013e0..bbda3b0de 100644 --- a/libmcount/dynamic.c +++ b/libmcount/dynamic.c @@ -429,10 +429,17 @@ static bool match_pattern_module(char *pathname) return ret; } -static bool match_pattern_list(struct uftrace_mmap *map, char *soname, char *sym_name) +/** + * match_pattern_list - match a symbol name against a pattern list + * @map - memory map of the symbol + * @soname - name of the module + * @sym_name - name of the symbol + * @return - -1 if match negative, 1 if match positive, 0 if no match + */ +static int match_pattern_list(struct uftrace_mmap *map, char *soname, char *sym_name) { struct patt_list *pl; - bool ret = false; + int ret = 0; char *libname = basename(map->libname); list_for_each_entry(pl, &patterns, list) { @@ -443,7 +450,7 @@ static bool match_pattern_list(struct uftrace_mmap *map, char *soname, char *sym continue; if (match_filter_pattern(&pl->patt, sym_name)) - ret = pl->positive; + ret = pl->positive ? 1 : -1; } return ret; @@ -515,9 +522,6 @@ static bool skip_sym(struct uftrace_symbol *sym, struct mcount_dynamic_info *mdi if (sym->type != ST_LOCAL_FUNC && sym->type != ST_GLOBAL_FUNC && sym->type != ST_WEAK_FUNC) return true; - if (!match_pattern_list(map, soname, sym->name)) - return true; - return false; } @@ -584,6 +588,7 @@ static void patch_normal_func_matched(struct mcount_dynamic_info *mdi, struct uf unsigned i; struct uftrace_symbol *sym; bool found = false; + int match; char *soname = get_soname(map->libname); symtab = &map->mod->symtab; @@ -593,9 +598,15 @@ static void patch_normal_func_matched(struct mcount_dynamic_info *mdi, struct uf if (skip_sym(sym, mdi, map, soname)) continue; - found = true; - mcount_patch_func_with_stats(mdi, sym); + + match = match_pattern_list(map, soname, sym->name); + if (!match) + continue; + else if (match == 1) + mcount_patch_func_with_stats(mdi, sym); + else + mcount_unpatch_func(mdi, sym, NULL); } if (!found) @@ -882,27 +893,27 @@ TEST_CASE(dynamic_pattern_list) pr_dbg("check simple match with default module\n"); parse_pattern_list("abc;!def", "main", PATT_SIMPLE); - TEST_EQ(match_pattern_list(main_map, NULL, "abc"), true); - TEST_EQ(match_pattern_list(main_map, NULL, "def"), false); - TEST_EQ(match_pattern_list(other_map, NULL, "xyz"), false); + TEST_EQ(match_pattern_list(main_map, NULL, "abc"), 1); + TEST_EQ(match_pattern_list(main_map, NULL, "def"), -1); + TEST_EQ(match_pattern_list(other_map, NULL, "xyz"), 0); release_pattern_list(); pr_dbg("check negative regex match with default module\n"); parse_pattern_list("!^a", "main", PATT_REGEX); - TEST_EQ(match_pattern_list(main_map, NULL, "abc"), false); - TEST_EQ(match_pattern_list(main_map, NULL, "def"), true); - TEST_EQ(match_pattern_list(other_map, NULL, "xyz"), false); + TEST_EQ(match_pattern_list(main_map, NULL, "abc"), -1); + TEST_EQ(match_pattern_list(main_map, NULL, "def"), 0); + TEST_EQ(match_pattern_list(other_map, NULL, "xyz"), 0); release_pattern_list(); pr_dbg("check wildcard match with other module\n"); parse_pattern_list("*@other", "main", PATT_GLOB); - TEST_EQ(match_pattern_list(main_map, NULL, "abc"), false); - TEST_EQ(match_pattern_list(main_map, NULL, "def"), false); - TEST_EQ(match_pattern_list(other_map, NULL, "xyz"), true); + TEST_EQ(match_pattern_list(main_map, NULL, "abc"), 0); + TEST_EQ(match_pattern_list(main_map, NULL, "def"), 0); + TEST_EQ(match_pattern_list(other_map, NULL, "xyz"), 1); release_pattern_list(); From 8725dbdb50a44f63bdf456039bbbaa3221b1a335 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Fri, 10 Mar 2023 10:03:17 -0500 Subject: [PATCH 11/30] configure: Check for membarrier support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Runtime dynamic patching requires cache synchronization. This is best achieved by using the 'MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE' memory barrier. However it was introduced in Linux 4.16. We set a flag indicating whether this membarrier can be used or if libmcount has to rely on other mechanisms (e.g. signals). Signed-off-by: Clément Guidi --- check-deps/Makefile | 1 + check-deps/Makefile.check | 4 ++++ check-deps/__have_membarrier.c | 10 ++++++++++ configure | 3 +++ 4 files changed, 18 insertions(+) create mode 100644 check-deps/__have_membarrier.c diff --git a/check-deps/Makefile b/check-deps/Makefile index 2ca41b3ee..8f06dfae4 100644 --- a/check-deps/Makefile +++ b/check-deps/Makefile @@ -14,6 +14,7 @@ CHECK_LIST += have_libdw CHECK_LIST += have_libunwind CHECK_LIST += have_libcapstone CHECK_LIST += cc_has_minline_all_stringops +CHECK_LIST += have_membarrier # # This is needed for checking build dependency diff --git a/check-deps/Makefile.check b/check-deps/Makefile.check index 93443e145..85042658d 100644 --- a/check-deps/Makefile.check +++ b/check-deps/Makefile.check @@ -82,3 +82,7 @@ endif ifneq ($(wildcard $(objdir)/check-deps/cc_has_minline_all_stringops),) LIB_CFLAGS += -minline-all-stringops endif + +ifneq ($(wildcard $(objdir)/check-deps/have_membarrier),) + LIB_CFLAGS += -DHAVE_MEMBARRIER +endif diff --git a/check-deps/__have_membarrier.c b/check-deps/__have_membarrier.c new file mode 100644 index 000000000..30a7c5a92 --- /dev/null +++ b/check-deps/__have_membarrier.c @@ -0,0 +1,10 @@ +#include +#include +#include + +int main() +{ + syscall(__NR_membarrier, MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, 0, 0); + syscall(__NR_membarrier, MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, 0, 0); + return 0; +} diff --git a/configure b/configure index c90aab2ad..ead8571a8 100755 --- a/configure +++ b/configure @@ -35,6 +35,7 @@ usage() { --without-capstone build without libcapstone (even if found on the system) --without-perf build without perf event (even if available) --without-schedule build without scheduler event (even if available) + --without-membarrier build without membarriers (even if available) --arch= set target architecture (default: system default arch) e.g. x86_64, aarch64, i386, or arm @@ -198,6 +199,7 @@ for dep in $IGNORE; do capstone) TARGET=have_libcapstone ;; perf*) TARGET=perf_clockid ;; sched*) TARGET=perf_context_switch;; + membarrier) TARGET=have_membarrier ;; *) ;; esac if [ ! -z "$TARGET" ]; then @@ -265,6 +267,7 @@ print_feature "perf_event" "perf_clockid" "perf (PMU) event support" print_feature "schedule" "perf_context_switch" "scheduler event support" print_feature "capstone" "have_libcapstone" "full dynamic tracing support" print_feature "libunwind" "have_libunwind" "stacktrace support (optional for debugging)" +print_feature "membarrier" "have_membarrier" "membarrier support (requires Linux 4.16.0)" cat >$output < Date: Mon, 3 Apr 2023 12:10:55 -0400 Subject: [PATCH 12/30] utils: Implement syscall wrappers for glibc < 2.30 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Glibc < 2.30 doen't provide wrappers for 'gettid()' and 'tgkill()' so we define them. Signed-off-by: Clément Guidi --- utils/utils.c | 26 ++++++++++++++++++++++++++ utils/utils.h | 3 +++ 2 files changed, 29 insertions(+) diff --git a/utils/utils.c b/utils/utils.c index 592b3df40..b3ec545f0 100644 --- a/utils/utils.c +++ b/utils/utils.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #ifdef HAVE_LIBUNWIND @@ -1001,6 +1002,31 @@ int copy_file(const char *path_in, const char *path_out) return 0; } +#ifndef gettid +/** + * gettid - gettid syscall wrapper (glibc < 2.30) + * @return - thread id + */ +pid_t gettid() +{ + return syscall(__NR_gettid); +} +#endif + +#ifndef tgkill +/** + * tgkill - tgkill syscall wrapper (glibc < 2.30) + * @tgid - thread group id + * @tid - thread id + * @signal - signal to send + * @return - 0 on success, -1 on error + */ +int tgkill(pid_t tgid, pid_t tid, int signal) +{ + return syscall(SYS_tgkill, tgid, tid, signal); +} +#endif + #ifdef UNIT_TEST TEST_CASE(utils_parse_cmdline) { diff --git a/utils/utils.h b/utils/utils.h index fb0d8c4ed..55e898527 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -429,4 +429,7 @@ void stacktrace(void); int copy_file(const char *path_in, const char *path_out); +pid_t gettid(); +int tgkill(pid_t tgid, pid_t tid, int signal); + #endif /* UFTRACE_UTILS_H */ From 25a8ceaac3dcf1eea2ad924deb11885cb29b5427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Tue, 4 Apr 2023 09:30:03 -0400 Subject: [PATCH 13/30] utils: Define signal related functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Functions to setup real-time signals and broadcast signals to all threads in an application. Useful for runtime synchronization mechanisms. Co-authored-by: Gabriel-Andrew Pollo-Guilbert Signed-off-by: Clément Guidi --- utils/utils.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++ utils/utils.h | 3 ++ 2 files changed, 93 insertions(+) diff --git a/utils/utils.c b/utils/utils.c index b3ec545f0..e85de181f 100644 --- a/utils/utils.c +++ b/utils/utils.c @@ -1027,6 +1027,96 @@ int tgkill(pid_t tgid, pid_t tid, int signal) } #endif +/** + * find_unused_sigrt - find a real-time signal with no associated action + * @return - unused RT signal, or -1 if none found + */ +int find_unused_sigrt() +{ + int sig; + struct sigaction oldact; + + for (sig = SIGRTMIN; sig <= SIGRTMAX; sig++) { + if (sigaction(sig, NULL, &oldact) < 0) { + pr_dbg3("failed to check RT signal handler\n"); + continue; + } + + if (!oldact.sa_handler) + return sig; + } + + pr_dbg2("failed to find unused SIGRT\n"); + return -1; +} + +/** + * thread_broadcast_signal - send a signal to all the other running threads in + * the process + * @sig - signal to send + * @return - number of signals sent, -1 on error + */ +int thread_broadcast_signal(int sig) +{ + char path[32]; + DIR *dir; + struct dirent *dirent; + pid_t pid, tid, task; + int signal_count = 0; + + pid = getpid(); + tid = gettid(); + + snprintf(path, 32, "/proc/%u/task", pid); + dir = opendir(path); + if (!dir) { + pr_dbg("failed to open directory '%s'\n", path); + goto fail_open_dir; + } + + errno = 0; + for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) { + /* skip "." and ".." directories */ + if (dirent->d_name[0] == '.') + continue; + + task = strtol(dirent->d_name, NULL, 10); + if (errno != 0 || task < 0) { + pr_dbg("failed to parse TID '%s'\n", dirent->d_name); + continue; + } + + /* ignore our TID */ + if (task == tid) + continue; + + /* + * By reading /proc//task directory, there is the possibility of + * a race condition where a thread exits before we send the signal. + */ + pr_dbg4("send SIGRT%d to %u\n", sig, task); + if (tgkill(pid, task, sig) < 0) { + if (errno != ESRCH) { + pr_dbg2("cannot signal thread %u: %s\n", task, strerror(errno)); + errno = 0; + } + } + else + signal_count++; + } + + if (errno != 0) + pr_dbg("failed to read directory entry\n"); + + if (closedir(dir) < 0) + pr_dbg2("failed to close directory\n"); + + return signal_count; + +fail_open_dir: + return -1; +} + #ifdef UNIT_TEST TEST_CASE(utils_parse_cmdline) { diff --git a/utils/utils.h b/utils/utils.h index 55e898527..3d11a9f5f 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -432,4 +432,7 @@ int copy_file(const char *path_in, const char *path_out); pid_t gettid(); int tgkill(pid_t tgid, pid_t tid, int signal); +int find_unused_sigrt(); +int thread_broadcast_signal(int sig); + #endif /* UFTRACE_UTILS_H */ From 45fc1f63da548abce01efd8eeaad765daa9d880f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Tue, 4 Apr 2023 09:34:13 -0400 Subject: [PATCH 14/30] mcount: Init arch-specific dynamic structures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trigger architecture specific dynamic initialization when initializing the dynamic instrumentation mechanics. Co-authored-by: Gabriel-Andrew Pollo-Guilbert Signed-off-by: Clément Guidi --- libmcount/dynamic.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/libmcount/dynamic.c b/libmcount/dynamic.c index bbda3b0de..0fbbedc4b 100644 --- a/libmcount/dynamic.c +++ b/libmcount/dynamic.c @@ -60,6 +60,8 @@ static unsigned min_size; /* disassembly engine for dynamic code patch (for capstone) */ static struct mcount_disasm_engine disasm; +static bool mcount_dynamic_is_initialized; + static struct mcount_orig_insn *create_code(struct Hashmap *map, unsigned long addr) { struct mcount_orig_insn *entry; @@ -247,6 +249,11 @@ __weak void mcount_disasm_finish(struct mcount_disasm_engine *disasm) { } +__weak int mcount_arch_dynamic_init(void) +{ + return -1; +} + __weak int mcount_arch_branch_table_size(struct mcount_disasm_info *info) { return 0; @@ -652,6 +659,13 @@ static int do_dynamic_update(struct uftrace_sym_info *sinfo, char *patch_funcs, /* TODO: filter out unsupported libs */ for (mdi = mdinfo; mdi != NULL; mdi = mdi->next) { + if (mdi->type == DYNAMIC_NONE && !mcount_dynamic_is_initialized) { + if (mcount_arch_dynamic_init() >= 0) + mcount_dynamic_is_initialized = true; + else + continue; + } + map = mdi->map; if (mdi->trampoline) patch_func_matched(mdi, map); From ddf642008817ed493ad48338cc0a37b2ee1371e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Tue, 4 Apr 2023 09:42:10 -0400 Subject: [PATCH 15/30] x86_64: dynamic: Refactor 'get_target_address' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor for more clarity. Signed-off-by: Clément Guidi --- arch/x86_64/mcount-dynamic.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/x86_64/mcount-dynamic.c b/arch/x86_64/mcount-dynamic.c index a14642287..268855247 100644 --- a/arch/x86_64/mcount-dynamic.c +++ b/arch/x86_64/mcount-dynamic.c @@ -247,9 +247,15 @@ void mcount_arch_find_module(struct mcount_dynamic_info *mdi, struct uftrace_sym elf_finish(&elf); } -static unsigned long get_target_addr(struct mcount_dynamic_info *mdi, unsigned long addr) +/** + * get_trampoline_offest - compute the relative address of the trampoline + * @mdi - mcount dynamic info + * @origin - origin address + * @return - distance to the trampoline + */ +static unsigned long get_trampoline_offset(struct mcount_dynamic_info *mdi, unsigned long origin) { - return mdi->trampoline - (addr + CALL_INSN_SIZE); + return mdi->trampoline - (origin + CALL_INSN_SIZE); } static int patch_fentry_code(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym) @@ -271,7 +277,7 @@ static int patch_fentry_code(struct mcount_dynamic_info *mdi, struct uftrace_sym } /* get the jump offset to the trampoline */ - target_addr = get_target_addr(mdi, (unsigned long)insn); + target_addr = get_trampoline_offset(mdi, (unsigned long)insn); if (target_addr == 0) return INSTRUMENT_SKIPPED; @@ -429,14 +435,14 @@ static void patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_inf { void *origin_code_addr; unsigned char call_insn[] = { 0xe8, 0x00, 0x00, 0x00, 0x00 }; - uint32_t target_addr = get_target_addr(mdi, info->addr); + uint32_t target_addr = get_trampoline_offset(mdi, info->addr); /* patch address */ origin_code_addr = (void *)info->addr; if (info->has_intel_cet) { origin_code_addr += ENDBR_INSN_SIZE; - target_addr = get_target_addr(mdi, info->addr + ENDBR_INSN_SIZE); + target_addr = get_trampoline_offset(mdi, info->addr + ENDBR_INSN_SIZE); } /* build the instrumentation instruction */ From 1a572ae1501c8fe2eca8753609fcef9c8c84c8a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Tue, 4 Apr 2023 09:47:13 -0400 Subject: [PATCH 16/30] x86_64: dynamic: Refactor 'patch_code' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor 'patch_code' so it can later be used at runtime. Co-authored-by: Gabriel-Andrew Pollo-Guilbert Signed-off-by: Clément Guidi --- arch/x86_64/mcount-dynamic.c | 119 +++++++++++++---------------------- 1 file changed, 45 insertions(+), 74 deletions(-) diff --git a/arch/x86_64/mcount-dynamic.c b/arch/x86_64/mcount-dynamic.c index 268855247..2047de025 100644 --- a/arch/x86_64/mcount-dynamic.c +++ b/arch/x86_64/mcount-dynamic.c @@ -386,93 +386,66 @@ static int patch_xray_func(struct mcount_dynamic_info *mdi, struct uftrace_symbo return ret; } -/* - * we overwrite instructions over 5bytes from start of function - * to call '__dentry__' that seems similar like '__fentry__'. - * - * while overwriting, After adding the generated instruction which - * returns to the address of the original instruction end, - * save it in the heap. - * - * for example: - * - * 4005f0: 31 ed xor %ebp,%ebp - * 4005f2: 49 89 d1 mov %rdx,%r9 - * 4005f5: 5e pop %rsi - * - * will changed like this : - * - * 4005f0 call qword ptr [rip + 0x200a0a] # 0x601000 - * - * and keeping original instruction : - * - * Original Instructions--------------- - * f1cff0: xor ebp, ebp - * f1cff2: mov r9, rdx - * f1cff5: pop rsi - * Generated Instruction to return----- - * f1cff6: jmp qword ptr [rip] - * f1cffc: QW 0x00000000004005f6 - * - * In the original case, address 0x601000 has a dynamic symbol - * start address. It is also the first element in the GOT array. - * while initializing the mcount library, we will replace it with - * the address of the function '__dentry__'. so, the changed - * instruction will be calling '__dentry__'. - * - * '__dentry__' has a similar function like '__fentry__'. - * the other thing is that it returns to original instructions - * we keeping. it makes it possible to execute the original - * instructions and return to the address at the end of the original - * instructions. Thus, the execution will goes on. - * - */ - -/* - * Patch the instruction to the address as given for arguments. +/** + * patch_code - inject a call to an instrumentation trampoline + * @mdi - dynamic info about the concerned module + * @info - disassembly info (instructions address and size) + * @return - instrumentation status */ -static void patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info *info) +static int patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info *info) { + /* + * Let's assume we have the following instructions. + * + * 0x0: push %rbp <= origin_code_addr + * 0x1: mov %rsp,%rbp + * 0x4: lea 0xeb0(%rip),%rdi + * 0xb: + * + * The goal is to modify the instructions in order to get the following + * ones. + * + * 0x0: call + * 0x5: + * 0xb: + */ + void *origin_code_addr; - unsigned char call_insn[] = { 0xe8, 0x00, 0x00, 0x00, 0x00 }; - uint32_t target_addr = get_trampoline_offset(mdi, info->addr); + void *trampoline_rel_addr; - /* patch address */ origin_code_addr = (void *)info->addr; - - if (info->has_intel_cet) { + if (info->has_intel_cet) origin_code_addr += ENDBR_INSN_SIZE; - target_addr = get_trampoline_offset(mdi, info->addr + ENDBR_INSN_SIZE); - } - /* build the instrumentation instruction */ - memcpy(&call_insn[1], &target_addr, CALL_INSN_SIZE - 1); + trampoline_rel_addr = (void *)get_trampoline_offset(mdi, (unsigned long)origin_code_addr); /* - * we need 5-bytes at least to instrumentation. however, - * if instructions is not fit 5-bytes, we will overwrite the - * 5-bytes and fill the remaining part of the last - * instruction with nop. * - * [example] - * In this example, we overwrite 9-bytes to use 5-bytes. * - * dynamic: 0x19e98b0[01]:push rbp - * dynamic: 0x19e98b1[03]:mov rbp, rsp - * dynamic: 0x19e98b4[05]:mov edi, 0x4005f4 * - * dynamic: 0x40054c[05]:call 0x400ff0 - * dynamic: 0x400551[01]:nop - * dynamic: 0x400552[01]:nop - * dynamic: 0x400553[01]:nop - * dynamic: 0x400554[01]:nop */ - memcpy(origin_code_addr, call_insn, CALL_INSN_SIZE); + /* + * We fill the remaining part of the patching region with nops. + * + * 0x0: int3 + * 0x1: + * 0x5: + * 0xb: + */ + + memcpy(&((uint8_t *)origin_code_addr)[1], &trampoline_rel_addr, CALL_INSN_SIZE - 1); memset(origin_code_addr + CALL_INSN_SIZE, 0x90, /* NOP */ info->orig_size - CALL_INSN_SIZE); - /* flush icache so that cpu can execute the new insn */ - __builtin___clear_cache(origin_code_addr, origin_code_addr + info->orig_size); + /* + * 0x0: call + * 0x5: + * 0xb: + */ + + ((uint8_t *)origin_code_addr)[0] = 0xe8; + + return INSTRUMENT_SUCCESS; } static int patch_normal_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym, @@ -522,9 +495,7 @@ static int patch_normal_func(struct mcount_dynamic_info *mdi, struct uftrace_sym else mcount_save_code(&info, call_offset, jmp_insn, sizeof(jmp_insn)); - patch_code(mdi, &info); - - return INSTRUMENT_SUCCESS; + return patch_code(mdi, &info); } static int unpatch_func(uint8_t *insn, char *name) From 52a01131c28fdc29e261ecdd8d0ee0d8d11ae07c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Tue, 4 Apr 2023 09:55:11 -0400 Subject: [PATCH 17/30] x86_64: dynamic: Protect patch zone with trap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When patching a function at runtime, first insert an int3 trap so any incoming thread is diverted from the patching region, to avoid executing partially modified code. The trap handler emulates a call to the trampoline, thus enabling the instrumentation. The trap is eventually removed in subsequent commits. Co-authored-by: Gabriel-Andrew Pollo-Guilbert Signed-off-by: Clément Guidi --- arch/x86_64/mcount-dynamic.c | 136 ++++++++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 2 deletions(-) diff --git a/arch/x86_64/mcount-dynamic.c b/arch/x86_64/mcount-dynamic.c index 2047de025..19dedd82e 100644 --- a/arch/x86_64/mcount-dynamic.c +++ b/arch/x86_64/mcount-dynamic.c @@ -9,6 +9,7 @@ #include "libmcount/dynamic.h" #include "libmcount/internal.h" #include "mcount-arch.h" +#include "utils/hashmap.h" #include "utils/symbol.h" #include "utils/utils.h" @@ -19,6 +20,125 @@ static const unsigned char patchable_clang_nop[] = { 0x0f, 0x1f, 0x44, 0x00, 0x0 static const unsigned char endbr64[] = { 0xf3, 0x0f, 0x1e, 0xfa }; +/* + * Hashmap of trampoline locations for every installed int3 trap. The traps are + * used when patching and unpatching functions: they prevent threads from + * entering the critical zone. When a trap is reached, the trap handler fetches + * the trampoline location from the hmap, and emulates a call to the trampoline, + * to mimic regular instrumentation. + * + * (void *int3_location -> void *trampoline) + */ +static struct Hashmap *int3_hmap; + +/** + * register_trap - save trampoline associated to a trap + * @trap - trap address + * @call_loc - address of the symbol to emulate a call to + * @return - -1 on error, 0 on success + */ +static int register_trap(void *trap, void *call_loc) +{ + if (!hashmap_put(int3_hmap, trap, call_loc)) + return -1; + + return 0; +} + +/** + * unregister_trap - remove trap entry from hmap + * @trap - trap address + * @return - -1 on error, 0 on success + */ +static int unregister_trap(void *trap) +{ + if (hashmap_remove(int3_hmap, trap)) + return 0; + + return -1; +} + +/** + * emulate_trampoline_call - SIGTRAP handler, emulates a call to the trampoline + * associated with the trap location + * + * When the trap handler is executed, it changes the program counter to point to + * . When the trap handler exits, the code at will + * execute (which is __dentry__ defined in dynamic.S). + * + * As __dentry__ is expected to be called like a function, it depends on the + * address of the caller to know which tracepoint was executed. The address is + * expected to be found on the stack. Therefore, the trap handler actually needs + * to emulate a call instruction entirely (moving the instruction pointer is not + * enough). + * + * To do so, the trap handler will also push on the stack the next instruction + * pointer that would be used if the executed instruction was a call instead of + * a trap. + * + * @sig - signal caught + * @info - (unused) + * @ucontext - user context, containing registers + */ +static void emulate_trampoline_call(int sig, siginfo_t *info, void *ucontext) +{ + ucontext_t *uctx = ucontext; + mcontext_t *mctx = &uctx->uc_mcontext; + void *int3_addr; + unsigned long trampoline; + unsigned long child_addr; /* probe location for mcount_entry */ + + ASSERT(sig == SIGTRAP); + + __atomic_signal_fence(__ATOMIC_SEQ_CST); + int3_addr = (void *)mctx->gregs[REG_RIP] - 1; /* int3 size = 1 */ + trampoline = (unsigned long)hashmap_get(int3_hmap, int3_addr); + + child_addr = (unsigned long)int3_addr + CALL_INSN_SIZE; + + mctx->gregs[REG_RSP] -= 8; + memcpy((void *)mctx->gregs[REG_RSP], &child_addr, 8); + mctx->gregs[REG_RIP] = trampoline; +} + +/** + * configure_sigtrap_handler - emulate call to trampoline on SIGTRAP + * @return - -1 on failure, 0 on success + */ +static int configure_sigtrap_handler() +{ + struct sigaction act; + + act.sa_sigaction = emulate_trampoline_call; + act.sa_flags = SA_SIGINFO; + + if (sigaction(SIGTRAP, &act, NULL) < 0) { + pr_err("failed to configure SIGTRAP handler\n"); + return -1; + } + + pr_dbg2("configured SIGTRAP handler\n"); + return 0; +} + +/** + * mcount_arch_dynamic_init - initialize arch-specific data structures to + * perform runtime dynamic instrumentation + */ +int mcount_arch_dynamic_init() +{ + if (!int3_hmap) { + int3_hmap = hashmap_create(4, hashmap_ptr_hash, hashmap_ptr_equals); + if (!int3_hmap) { + pr_dbg("failed to create int3 hashmap\n"); + return -1; + } + } + if (configure_sigtrap_handler() < 0) + return -1; + + return 0; +} int mcount_setup_trampoline(struct mcount_dynamic_info *mdi) { unsigned char trampoline[] = { 0x3e, 0xff, 0x25, 0x01, 0x00, 0x00, 0x00, 0xcc }; @@ -419,11 +539,22 @@ static int patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info trampoline_rel_addr = (void *)get_trampoline_offset(mdi, (unsigned long)origin_code_addr); - /* - * + /* The first step is to insert a 1-byte trap-based probe point (atomic). + * This will prevent threads to enter the critical zone while we patch it, + * so no core will see partial modifications. * + * 0x0: int3 <= origin_code_addr + * 0x1: mov %rsp,%rbp + * 0x4: lea 0xeb0(%rip),%rdi + * 0xb: * + * The trap will emulate a call to the trampoline while in place. */ + + if (register_trap(origin_code_addr, (void *)mdi->trampoline) == -1) + return INSTRUMENT_FAILED; + ((uint8_t *)origin_code_addr)[0] = 0xcc; + /* * We fill the remaining part of the patching region with nops. * @@ -444,6 +575,7 @@ static int patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info */ ((uint8_t *)origin_code_addr)[0] = 0xe8; + unregister_trap(origin_code_addr); return INSTRUMENT_SUCCESS; } From 55254726680862ec1f756e316e72f774bb4aa090 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Tue, 4 Apr 2023 10:02:53 -0400 Subject: [PATCH 18/30] x86_64: dynamic: Synchronize cores when patching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When cross-modifying code, the software needs to ensure that all cores will execute valid instructions at any time. When modifications are not atomic, we issue a specific memory barrier (or execute a serializing instruction on Linux < 4.16) to serialize the execution across all cores. This flushes the different caches, especially the processor pipelines that may have partially fetched straddling instructions. Co-authored-by: Gabriel-Andrew Pollo-Guilbert Signed-off-by: Clément Guidi --- arch/x86_64/mcount-dynamic.c | 131 +++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/arch/x86_64/mcount-dynamic.c b/arch/x86_64/mcount-dynamic.c index 19dedd82e..f91cbc9e8 100644 --- a/arch/x86_64/mcount-dynamic.c +++ b/arch/x86_64/mcount-dynamic.c @@ -1,7 +1,15 @@ +#include #include #include #include +#if HAVE_MEMBARRIER +#include +#else +#include +#include +#endif + /* This should be defined before #include "utils.h" */ #define PR_FMT "dynamic" #define PR_DOMAIN DBG_DYNAMIC @@ -121,6 +129,120 @@ static int configure_sigtrap_handler() return 0; } +#if HAVE_MEMBARRIER + +/** + * setup_synchronization_mechanism - register intent to use the 'private + * expedited sync core' membarrier to synchronize instruction pipelines and + * caches across cores, for safe cross-modification. + * @return - negative on error, 0 on success + */ +static int setup_synchronization_mechanism() +{ + int ret = + syscall(__NR_membarrier, MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, 0, 0); + if (ret < 0) + pr_dbg2("failed to register membarrier intent: %s\n", strerror(errno)); + return ret; +} + +/** + * synchronize_all_cores - use membarrier to perform cache and pipeline + * synchronization across cores executing cross-modified code + * @return - negative on error, 0 on success + */ +static int synchronize_all_cores() +{ + int ret = syscall(__NR_membarrier, MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, 0, 0); + if (ret < 0) + pr_dbg2("failed to use membarrier: %s\n", strerror(errno)); + return ret; +} + +#else /* HAVE_MEMBARRIER */ + +/* signal used to perform cache and pipeline synchronization across cores */ +static int sig_sync_cores; + +/* counter for the threads that have performed serialization when a signal is + issued */ +static sem_t sem_sync_cores; + +/** + * serialize_instruction_execution - execute core serialize instruction + * + * According to Intel manual, CPUID is a serializing instruction. + */ +static void serialize_instruction_execution() +{ + int _; + __cpuid(_, _, _, _, _); + sem_post(&sem_sync_cores); +} + +/** + * setup_synchronization_mechanism - setup real-time signal and its handler to + * perform core synchronization across all threads + * @return - 0 on success, -1 on failure + */ +static int setup_synchronization_mechanism() +{ + struct sigaction act; + + if (sig_sync_cores > 0) + return 0; + + sig_sync_cores = find_unused_sigrt(); + if (sig_sync_cores == -1) + return -1; + + sem_init(&sem_sync_cores, 0, 0); + + act.sa_sigaction = serialize_instruction_execution; + act.sa_flags = 0; + + if (sigaction(sig_sync_cores, &act, NULL) < 0) { + pr_dbg("failed to configure core synchronization signal handler\n"); + return -1; + } + + pr_dbg("configured core synchronization signal (SIGRT%d) handler\n", sig_sync_cores); + return 0; +} + +/** + * serialize_instruction_cache - send RT signals to perform cache and pipeline + * synchronization across cores executing cross-modified code. + * @return - -1 on error, 0 on success + */ +static int synchronize_all_cores() +{ + int signal_count; + int sync_count = 0; + struct timespec ts; + + ASSERT(sig_sync_cores >= SIGRTMIN); + + signal_count = thread_broadcast_signal(sig_sync_cores); + + if (clock_gettime(CLOCK_REALTIME, &ts) == -1) + return -1; + ts.tv_sec += 1; + + for (int i = 0; i < signal_count; i++) { + if (sem_timedwait(&sem_sync_cores, &ts) == -1) { + if (errno == EINTR) + i--; + } + else + sync_count++; + } + pr_dbg3("synced core in %d/%d thread(s)\n", sync_count, signal_count); + + return 0; +} + +#endif /* HAVE_MEMBARRIER */ /** * mcount_arch_dynamic_init - initialize arch-specific data structures to * perform runtime dynamic instrumentation @@ -137,8 +259,12 @@ int mcount_arch_dynamic_init() if (configure_sigtrap_handler() < 0) return -1; + if (setup_synchronization_mechanism() < 0) + return -1; + return 0; } + int mcount_setup_trampoline(struct mcount_dynamic_info *mdi) { unsigned char trampoline[] = { 0x3e, 0xff, 0x25, 0x01, 0x00, 0x00, 0x00, 0xcc }; @@ -549,12 +675,17 @@ static int patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info * 0xb: * * The trap will emulate a call to the trampoline while in place. + * + * We ensure that every core sees the trap before patching the critical + * zone, by synchronizing the them. */ if (register_trap(origin_code_addr, (void *)mdi->trampoline) == -1) return INSTRUMENT_FAILED; ((uint8_t *)origin_code_addr)[0] = 0xcc; + synchronize_all_cores(); + /* * We fill the remaining part of the patching region with nops. * From c8070e6369a7e83421797aff7808edbe06185215 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Tue, 4 Apr 2023 10:12:38 -0400 Subject: [PATCH 19/30] x86_64: dynamic: Move threads out of patching zone MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When patching at runtime, no thread can enter the patching region due to the trap that is inserted at the start of it. But threads that entered the region before the trap is installed can still be executing instructions in the region. We broadcast a real-time signal instruction all threads to check their instruction pointer, and execute out-of-line if they are in the patching region. Co-authored-by: Gabriel-Andrew Pollo-Guilbert Signed-off-by: Clément Guidi --- arch/x86_64/mcount-dynamic.c | 176 ++++++++++++++++++++++++++++++++++- libmcount/dynamic.c | 1 + 2 files changed, 175 insertions(+), 2 deletions(-) diff --git a/arch/x86_64/mcount-dynamic.c b/arch/x86_64/mcount-dynamic.c index f91cbc9e8..c785dac55 100644 --- a/arch/x86_64/mcount-dynamic.c +++ b/arch/x86_64/mcount-dynamic.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -7,7 +8,6 @@ #include #else #include -#include #endif /* This should be defined before #include "utils.h" */ @@ -39,6 +39,22 @@ static const unsigned char endbr64[] = { 0xf3, 0x0f, 0x1e, 0xfa }; */ static struct Hashmap *int3_hmap; +/* Hashmap of the addresses of instructions that are in patching zones and need + * to be executed out of line. The addresses are mapped to their out of line + * equivalent. + * + * (void *critical_insn -> void *out_of_line_insn) + */ +static struct Hashmap *patch_region_hmap; + +/* Realtime signal number to instruct running threads to move out of patching + regions */ +static int sig_clear_patch_region; + +/* counter for the threads that are guaranteed to be out of registered patching + regions when a signal is issued */ +static sem_t sem_clear_patch_region; + /** * register_trap - save trampoline associated to a trap * @trap - trap address @@ -233,6 +249,8 @@ static int synchronize_all_cores() if (sem_timedwait(&sem_sync_cores, &ts) == -1) { if (errno == EINTR) i--; + else + pr_dbg3("error syncing with signal handler: %s\n", strerror(errno)); } else sync_count++; @@ -243,6 +261,143 @@ static int synchronize_all_cores() } #endif /* HAVE_MEMBARRIER */ + +/** + * register_patch_region - mark a memory region as critical by registering the + * addresses that it contains in 'patch_region_hmap' + * @start - address of the patch region + * @len - length of the patch region + * @return - -1 on error, 0 on success + */ +static int register_patch_region(void *start, int len) +{ + void *out_of_line_buffer = mcount_find_code((unsigned long)start + CALL_INSN_SIZE); + if (!out_of_line_buffer) + return -1; + + for (int i = 0; i < len; i++) { + if (!hashmap_put(patch_region_hmap, start + i, out_of_line_buffer + i)) + return -1; + } + + return 0; +} + +/** + * unregister_patch_region - unmark a memory region as critical + * @start - address of the patch region + * @len - length of the patch region + * @return - -1 on error, 0 on success + */ +static int unregister_patch_region(void *start, int len) +{ + void *out_of_line_buffer = mcount_find_code((unsigned long)start); + if (!out_of_line_buffer) + return -1; + + for (int i = 0; i < len; i++) { + if (!hashmap_remove(patch_region_hmap, start + i)) + return -1; + } + + return 0; +} + +/** + * leave_patch_region - signal handler on which a thread executes out of line if + * it happens to be in a registered patching region + * @sig - signal number + * @info - signal info (unused) + * @ucontext - user context + */ +static void leave_patch_region(int sig, siginfo_t *info, void *ucontext) +{ + ucontext_t *uctx = ucontext; + mcontext_t *mctx = &uctx->uc_mcontext; + void *next_insn; + void *out_of_line_insn; + (void)sig; + + next_insn = (void *)mctx->gregs[REG_RIP]; + out_of_line_insn = hashmap_get(patch_region_hmap, next_insn); + if (out_of_line_insn) + mctx->gregs[REG_RIP] = (uint64_t)out_of_line_insn; + + sem_post(&sem_clear_patch_region); +} + +/** + * clear_patch_region - move threads that are in a patching region out of line + * @return - 0 + */ +static int clear_patch_region() +{ + int signal_count; + int move_count = 0; + struct timespec ts; + + ASSERT(sig_clear_patch_region >= SIGRTMIN); + + signal_count = thread_broadcast_signal(sig_clear_patch_region); + + if (clock_gettime(CLOCK_REALTIME, &ts) == -1) + return -1; + ts.tv_sec += 1; + + for (int i = 0; i < signal_count; i++) { + if (sem_timedwait(&sem_clear_patch_region, &ts) == -1) { + if (errno == EINTR) + i--; + else + pr_dbg3("error syncing with signal handler: %s\n", strerror(errno)); + } + else + move_count++; + } + pr_dbg3("checked ip of %d/%d thread(s)\n", move_count, signal_count); + + return 0; +} + +/** + * setup_clear_patch_region - initialize data structures and signals used to + * move threads of patching regions + * @return - -1 on error, 0 on success + */ +int setup_clear_patch_region() +{ + struct sigaction act; + + if (!patch_region_hmap) { + patch_region_hmap = hashmap_create(4, hashmap_ptr_hash, hashmap_ptr_equals); + if (!patch_region_hmap) { + pr_dbg("failed to create patch region hashmap\n"); + return -1; + } + } + + if (sig_clear_patch_region > 0) + return 0; + + sig_clear_patch_region = find_unused_sigrt(); + if (sig_clear_patch_region == -1) + return -1; + + sem_init(&sem_clear_patch_region, 0, 0); + + act.sa_sigaction = leave_patch_region; + act.sa_flags = 0; + + if (sigaction(sig_clear_patch_region, &act, NULL) < 0) { + pr_dbg("failed to configure clear signal (SIGRT%d) handler\n", + sig_clear_patch_region); + return -1; + } + + pr_dbg("configured clear signal (SIGRT%d) handler\n", sig_clear_patch_region); + return 0; +} + /** * mcount_arch_dynamic_init - initialize arch-specific data structures to * perform runtime dynamic instrumentation @@ -262,6 +417,8 @@ int mcount_arch_dynamic_init() if (setup_synchronization_mechanism() < 0) return -1; + setup_clear_patch_region(); + return 0; } @@ -687,7 +844,22 @@ static int patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info synchronize_all_cores(); /* - * We fill the remaining part of the patching region with nops. + * The second step is to move any thread out of the critical zone if still + * present. Threads in the critical zone resume execution out of line, in + * their dedicated OLX region. + * + * The method used to move the threads is to signal all the threads, so they + * check if their instruction pointer is in the patching region. If so, they + * move their instruction pointer to the corresponding one in the OLX + * region. + */ + + if (register_patch_region(origin_code_addr, info->orig_size) == -1) + pr_dbg3("failed to register patch region\n"); + clear_patch_region(); + unregister_patch_region(origin_code_addr, info->orig_size); + + /* We fill the remaining part of the patching region with nops. * * 0x0: int3 * 0x1: diff --git a/libmcount/dynamic.c b/libmcount/dynamic.c index 0fbbedc4b..e7cbafd38 100644 --- a/libmcount/dynamic.c +++ b/libmcount/dynamic.c @@ -52,6 +52,7 @@ struct code_page { static LIST_HEAD(code_pages); +/* contains out-of-line execution code (return address -> modified instructions ptr) */ static struct Hashmap *code_hmap; /* minimum function size for dynamic update */ From 94fed36a279b157e4c296216f9b3917869fc1aa6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Tue, 4 Apr 2023 10:17:22 -0400 Subject: [PATCH 20/30] x86_64: dynamic: Comment patching process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add comments for the last steps of the runtime patching sequence. Co-authored-by: Gabriel-Andrew Pollo-Guilbert Signed-off-by: Clément Guidi --- arch/x86_64/mcount-dynamic.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86_64/mcount-dynamic.c b/arch/x86_64/mcount-dynamic.c index c785dac55..f25ff6169 100644 --- a/arch/x86_64/mcount-dynamic.c +++ b/arch/x86_64/mcount-dynamic.c @@ -859,7 +859,10 @@ static int patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info clear_patch_region(); unregister_patch_region(origin_code_addr, info->orig_size); - /* We fill the remaining part of the patching region with nops. + /* The third step is to write the target address of the call. From the + * processor view the 4-bytes address can be any garbage instructions. + * + * We fill the remaining part of the patching region with nops. * * 0x0: int3 * 0x1: @@ -870,8 +873,11 @@ static int patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info memcpy(&((uint8_t *)origin_code_addr)[1], &trampoline_rel_addr, CALL_INSN_SIZE - 1); memset(origin_code_addr + CALL_INSN_SIZE, 0x90, /* NOP */ info->orig_size - CALL_INSN_SIZE); + /* FIXME Need to sync cores? Store membarrier? */ /* + * The fourth and last step is to replace the trap with the call opcode. + * * 0x0: call * 0x5: * 0xb: From 9b29fab2cedd5ec19bfe1cbd15062ffb7ec1aa58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Mon, 10 Apr 2023 08:52:33 -0400 Subject: [PATCH 21/30] dynamic: Batch function patching process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Synchronization requires sending signals to threads inside the process. This is performed for every symbols, which means a lot of signal can be sent. This has a major performance impact. This commit batches the initial and final steps of the patching process so we only need to send one signal per thread for every batch. Co-authored-by: Gabriel-Andrew Pollo-Guilbert Signed-off-by: Clément Guidi --- arch/x86_64/mcount-dynamic.c | 179 +++++++++++++++++++++++++++-------- libmcount/dynamic.c | 6 ++ 2 files changed, 147 insertions(+), 38 deletions(-) diff --git a/arch/x86_64/mcount-dynamic.c b/arch/x86_64/mcount-dynamic.c index f25ff6169..5f609f3c6 100644 --- a/arch/x86_64/mcount-dynamic.c +++ b/arch/x86_64/mcount-dynamic.c @@ -327,10 +327,10 @@ static void leave_patch_region(int sig, siginfo_t *info, void *ucontext) } /** - * clear_patch_region - move threads that are in a patching region out of line + * clear_patch_regions - move threads that are in a patching region out of line * @return - 0 */ -static int clear_patch_region() +static int clear_patch_regions() { int signal_count; int move_count = 0; @@ -398,6 +398,32 @@ int setup_clear_patch_region() return 0; } +/* This list is used to store functions that need to be optimized or cleaned up + * later in the code. In both case, a SIGRTMIN+n must be send. By optimizing or + * cleaning all of them up at the same time, we only need to send one signal per + * thread. + */ +LIST_HEAD(normal_funcs_patch); + +struct patch_dynamic_info { + struct list_head list; + struct mcount_dynamic_info *mdi; + struct mcount_disasm_info *info; +}; + +static void commit_normal_func(struct list_head *list, struct mcount_dynamic_info *mdi, + struct mcount_disasm_info *info) +{ + struct patch_dynamic_info *pdi; + pdi = xmalloc(sizeof(*pdi)); + + pdi->mdi = mdi; + pdi->info = info; + INIT_LIST_HEAD(&pdi->list); + + list_add(&pdi->list, list); +} + /** * mcount_arch_dynamic_init - initialize arch-specific data structures to * perform runtime dynamic instrumentation @@ -790,12 +816,14 @@ static int patch_xray_func(struct mcount_dynamic_info *mdi, struct uftrace_symbo } /** - * patch_code - inject a call to an instrumentation trampoline - * @mdi - dynamic info about the concerned module - * @info - disassembly info (instructions address and size) - * @return - instrumentation status + * patch_region_lock - insert a trap at the beginning of a patching region so no + * new incoming thread will execute it, and register the region as critical for + * next step + * @mdi - mcount dynamic info + * @info - disassembly info for the patched symbol + * @return - INSTRUMENT_SUCCESS */ -static int patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info *info) +static int patch_region_lock(struct mcount_dynamic_info *mdi, struct mcount_disasm_info *info) { /* * Let's assume we have the following instructions. @@ -814,14 +842,11 @@ static int patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info */ void *origin_code_addr; - void *trampoline_rel_addr; origin_code_addr = (void *)info->addr; if (info->has_intel_cet) origin_code_addr += ENDBR_INSN_SIZE; - trampoline_rel_addr = (void *)get_trampoline_offset(mdi, (unsigned long)origin_code_addr); - /* The first step is to insert a 1-byte trap-based probe point (atomic). * This will prevent threads to enter the critical zone while we patch it, * so no core will see partial modifications. @@ -832,19 +857,13 @@ static int patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info * 0xb: * * The trap will emulate a call to the trampoline while in place. - * - * We ensure that every core sees the trap before patching the critical - * zone, by synchronizing the them. */ if (register_trap(origin_code_addr, (void *)mdi->trampoline) == -1) return INSTRUMENT_FAILED; ((uint8_t *)origin_code_addr)[0] = 0xcc; - synchronize_all_cores(); - - /* - * The second step is to move any thread out of the critical zone if still + /* The second step is to move any thread out of the critical zone if still * present. Threads in the critical zone resume execution out of line, in * their dedicated OLX region. * @@ -856,8 +875,26 @@ static int patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info if (register_patch_region(origin_code_addr, info->orig_size) == -1) pr_dbg3("failed to register patch region\n"); - clear_patch_region(); - unregister_patch_region(origin_code_addr, info->orig_size); + + return INSTRUMENT_SUCCESS; +} + +/** + * patch_code - patch a region of the code with the operand of the call + * instruction used to probe a function. The call opcode needs to be inserted + * later. + * @mdi - mcount dynamic info for the current module + * @info - disassembly info for the patched symbol + */ +static void patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info *info) +{ + void *trampoline_rel_addr; + void *origin_code_addr; + + origin_code_addr = (void *)info->addr; + if (info->has_intel_cet) + origin_code_addr += ENDBR_INSN_SIZE; + trampoline_rel_addr = (void *)get_trampoline_offset(mdi, (unsigned long)origin_code_addr); /* The third step is to write the target address of the call. From the * processor view the 4-bytes address can be any garbage instructions. @@ -873,7 +910,23 @@ static int patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info memcpy(&((uint8_t *)origin_code_addr)[1], &trampoline_rel_addr, CALL_INSN_SIZE - 1); memset(origin_code_addr + CALL_INSN_SIZE, 0x90, /* NOP */ info->orig_size - CALL_INSN_SIZE); - /* FIXME Need to sync cores? Store membarrier? */ +} + +/** + * patch_region_unlock - unmark a region as critical and remove the trap that + * prevents execution. + * @info - disassembly info for the patched symbol + */ +static void patch_region_unlock(struct mcount_disasm_info *info) +{ + void *origin_code_addr; + + origin_code_addr = (void *)info->addr; + if (info->has_intel_cet) + origin_code_addr += ENDBR_INSN_SIZE; + + if (unregister_patch_region(origin_code_addr, info->orig_size) == -1) + pr_dbg3("failed to unregister patch region\n"); /* * The fourth and last step is to replace the trap with the call opcode. @@ -884,13 +937,19 @@ static int patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info */ ((uint8_t *)origin_code_addr)[0] = 0xe8; - unregister_trap(origin_code_addr); - - return INSTRUMENT_SUCCESS; } -static int patch_normal_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym, - struct mcount_disasm_engine *disasm) +/** + * patch_normal_func_init - perform the initial steps of the patching process, + * awaiting for sanitization of the critical region. This step is batched with + * subsequent ones. + * @mdi - mcount dynamic info for the current module + * @sym - symbol to patch + * @disasm - disassembly engine + * @return - instrumentation status + */ +static int patch_normal_func_init(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym, + struct mcount_disasm_engine *disasm) { uint8_t jmp_insn[15] = { 0x3e, @@ -898,20 +957,22 @@ static int patch_normal_func(struct mcount_dynamic_info *mdi, struct uftrace_sym 0x25, }; uint64_t jmp_target; - struct mcount_disasm_info info = { - .sym = sym, - .addr = mdi->map->start + sym->addr, - }; + struct mcount_disasm_info *info; unsigned call_offset = CALL_INSN_SIZE; int state; - state = disasm_check_insns(disasm, mdi, &info); + info = xmalloc(sizeof(*info)); + memset(info, 0, sizeof(*info)); + info->sym = sym; + info->addr = mdi->map->start + sym->addr; + + state = disasm_check_insns(disasm, mdi, info); if (state != INSTRUMENT_SUCCESS) { pr_dbg3(" >> %s: %s\n", state == INSTRUMENT_FAILED ? "FAIL" : "SKIP", sym->name); return state; } - pr_dbg2("force patch normal func: %s (patch size: %d)\n", sym->name, info.orig_size); + pr_dbg2("force patch normal func: %s (patch size: %d)\n", sym->name, info->orig_size); /* * stored origin instruction block: @@ -923,20 +984,62 @@ static int patch_normal_func(struct mcount_dynamic_info *mdi, struct uftrace_sym * | [Return address] | * ---------------------- */ - jmp_target = info.addr + info.orig_size; - if (info.has_intel_cet) { + jmp_target = info->addr + info->orig_size; + if (info->has_intel_cet) { jmp_target += ENDBR_INSN_SIZE; call_offset += ENDBR_INSN_SIZE; } memcpy(jmp_insn + CET_JMP_INSN_SIZE, &jmp_target, sizeof(jmp_target)); - if (info.has_jump) - mcount_save_code(&info, call_offset, jmp_insn, 0); + if (info->has_jump) + mcount_save_code(info, call_offset, jmp_insn, 0); else - mcount_save_code(&info, call_offset, jmp_insn, sizeof(jmp_insn)); + mcount_save_code(info, call_offset, jmp_insn, sizeof(jmp_insn)); - return patch_code(mdi, &info); + state = patch_region_lock(mdi, info); + commit_normal_func(&normal_funcs_patch, mdi, info); + + return state; +} + +/** + * mcount_patch_normal_func_fini - perform the final step of the patching process and cleanup + * @return - 0 + */ +void mcount_patch_normal_func_fini() +{ + struct patch_dynamic_info *pdi, *tmp; + + if (list_empty(&normal_funcs_patch)) + return; + + /* We ensure that every core sees the trap before patching the critical + * zone, by synchronizing the them. + */ + synchronize_all_cores(); + clear_patch_regions(); + + list_for_each_entry_safe(pdi, tmp, &normal_funcs_patch, list) { + patch_code(pdi->mdi, pdi->info); + write_memory_barrier(); + patch_region_unlock(pdi->info); + } + + synchronize_all_cores(); + + list_for_each_entry_safe(pdi, tmp, &normal_funcs_patch, list) { + void *origin_code_addr; + + origin_code_addr = (void *)pdi->info->addr; + if (pdi->info->has_intel_cet) + origin_code_addr += ENDBR_INSN_SIZE; + + unregister_trap(origin_code_addr); + list_del(&pdi->list); + free(pdi->info); + free(pdi); + } } static int unpatch_func(uint8_t *insn, char *name) @@ -1025,7 +1128,7 @@ int mcount_patch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sy break; case DYNAMIC_NONE: - result = patch_normal_func(mdi, sym, disasm); + result = patch_normal_func_init(mdi, sym, disasm); break; default: diff --git a/libmcount/dynamic.c b/libmcount/dynamic.c index e7cbafd38..be8d838fd 100644 --- a/libmcount/dynamic.c +++ b/libmcount/dynamic.c @@ -227,6 +227,10 @@ __weak int mcount_patch_func(struct mcount_dynamic_info *mdi, struct uftrace_sym return -1; } +__weak void mcount_patch_normal_func_fini() +{ +} + __weak int mcount_unpatch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym, struct mcount_disasm_engine *disasm) { @@ -620,6 +624,8 @@ static void patch_normal_func_matched(struct mcount_dynamic_info *mdi, struct uf if (!found) stats.nomatch++; + mcount_patch_normal_func_fini(); + free(soname); } From e0764602eb208dd41c5b5763939ee5937fc3f9af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Mon, 10 Apr 2023 12:27:34 -0400 Subject: [PATCH 22/30] dynamic: x86_64: Factor out 'check_endbr64' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check if instruction at a given address is ENDBR64. Signed-off-by: Clément Guidi --- arch/x86_64/mcount-insn.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/x86_64/mcount-insn.c b/arch/x86_64/mcount-insn.c index 5e0e1bfa0..2725c0eb5 100644 --- a/arch/x86_64/mcount-insn.c +++ b/arch/x86_64/mcount-insn.c @@ -571,13 +571,24 @@ static bool check_unsupported(struct mcount_disasm_engine *disasm, cs_insn *insn return true; } +/** + * check_endbr64 - check if instruction at @addr is endbr64 + * @addr - address to check for endbr64 + * @return - 1 if found, 0 if not + */ +int check_endbr64(unsigned long addr) +{ + uint8_t endbr64[] = { 0xf3, 0x0f, 0x1e, 0xfa }; + + return !memcmp((void *)addr, endbr64, sizeof(endbr64)); +} + int disasm_check_insns(struct mcount_disasm_engine *disasm, struct mcount_dynamic_info *mdi, struct mcount_disasm_info *info) { int status; cs_insn *insn = NULL; uint32_t count, i, size; - uint8_t endbr64[] = { 0xf3, 0x0f, 0x1e, 0xfa }; void *trampoline_addr; uint8_t *operand; struct dynamic_bad_symbol *badsym; @@ -600,9 +611,9 @@ int disasm_check_insns(struct mcount_disasm_engine *disasm, struct mcount_dynami return INSTRUMENT_SKIPPED; size = info->sym->size; - if (!memcmp((void *)info->addr, endbr64, sizeof(endbr64))) { - addr += sizeof(endbr64); - size -= sizeof(endbr64); + if (check_endbr64(info->addr)) { + addr += ENDBR_INSN_SIZE; + size -= ENDBR_INSN_SIZE; if (size <= CALL_INSN_SIZE) return INSTRUMENT_SKIPPED; From 77d4a6ecfd939553612209ef2730af102bed5379 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Fri, 10 Mar 2023 11:00:43 -0500 Subject: [PATCH 23/30] dynamic: x86_64: Full-dynamic runtime unpatching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit add for dynamically unpatching functions for the x86_64 architecture. The process is executed concurrently by replacing using a trap to resolve race conditions between thread (similar to how optimized kprobes are done). Co-authored-by: Gabriel-Andrew Pollo-Guilbert Signed-off-by: Clément Guidi --- arch/x86_64/mcount-dynamic.c | 111 ++++++++++++++++++++++++++++++++++- libmcount/dynamic.c | 5 +- 2 files changed, 111 insertions(+), 5 deletions(-) diff --git a/arch/x86_64/mcount-dynamic.c b/arch/x86_64/mcount-dynamic.c index 5f609f3c6..901846667 100644 --- a/arch/x86_64/mcount-dynamic.c +++ b/arch/x86_64/mcount-dynamic.c @@ -1042,6 +1042,110 @@ void mcount_patch_normal_func_fini() } } +extern int check_endbr64(unsigned long addr); + +/** + * unpatch_normal_func - remove full-dynamic instrumentation from a function + * entry and restore original instructions + * @mdi - mcount dynamic info for the current module + * @sym - symbol to patch + * @return - instrumentation status + */ +static int unpatch_normal_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym) +{ + /* + * Let assume that we have the following instructions. + * + * 0x0: call + * 0x5: + * 0xb: + * + * The goal is to modify the instructions in order to get the following + * instructions. + * + * 0x0: push %rbp + * 0x1: mov %rsp,%rbp + * 0x4: lea 0xeb0(%rip),%rdi + * 0xb: + */ + + unsigned long origin_code_addr; + unsigned call_offset = CALL_INSN_SIZE; + struct mcount_orig_insn *orig; + uint8_t *origin_code; + int orig_size; + + origin_code_addr = mdi->map->start + sym->addr; + if (check_endbr64(origin_code_addr)) + origin_code_addr += ENDBR_INSN_SIZE; + orig = mcount_find_insn(origin_code_addr + call_offset); + if (!orig) { + pr_dbg("cannot find original instructions for '%s' at %#lx\n", + origin_code_addr + call_offset, sym->name); + return -1; + } + origin_code = orig->orig; + orig_size = orig->orig_size; + + pr_dbg2("unpatch normal func: %s (patch size: %d)\n", sym->name, orig_size); + + /* + * The first step is to insert a trap. + * + * 0x0: int3 + * 0x1: + * 0x5: + * 0xb: + */ + + ((uint8_t *)origin_code_addr)[0] = 0xcc; + + /* + * The second step is to restore the bytes after the trap instruction. + * + * 0x0: int3 + * 0x1: mov %rsp,%rbp + * 0x4: lea 0xeb0(%rip),%rdi + * 0xb: + * + * Before restoring the last byte, a serialization instruction must be + * executed in order to synchronize the instruction cache of each processor. + * The easiest method is to execute a membarrier system call with + * MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE. It will send a + * inter-processor interrupt that will execute the required serialization. + */ + + synchronize_all_cores(); + + if (!memcpy(&((uint8_t *)origin_code_addr)[1], &origin_code[1], orig_size - 1)) + return -1; + + /* + * The third step is to restore the last byte. + * + * 0x0: push %rbp + * 0x1: mov %rsp,%rbp + * 0x4: lea 0xeb0(%rip),%rdi + * 0xb: + */ + + ((uint8_t *)origin_code_addr)[0] = origin_code[0]; + + /* + * The fourth and last step is to move all thread that are currently + * executing in the modified instructions to the original instructions at + * the end of the trampoline. This is needed in order to free the memory + * allocated for the trampoline without any race condition. + * + * The method used to move the threads is to send the SIGRTMIN+n signal to + * all other threads. When their thread handler executes, it will check if + * the next instruction pointer is in the patching region. If it is, will + * move the next instruction pointer to the equivalent modified instruction. + */ + + return INSTRUMENT_SUCCESS; +} + static int unpatch_func(uint8_t *insn, char *name) { uint8_t nop5[] = { 0x0f, 0x1f, 0x44, 0x00, 0x00 }; @@ -1137,8 +1241,7 @@ int mcount_patch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sy return result; } -int mcount_unpatch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym, - struct mcount_disasm_engine *disasm) +int mcount_unpatch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym) { int result = INSTRUMENT_SKIPPED; @@ -1151,6 +1254,10 @@ int mcount_unpatch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol * result = unpatch_mcount_func(mdi, sym); break; + case DYNAMIC_NONE: + result = unpatch_normal_func(mdi, sym); + break; + default: break; } diff --git a/libmcount/dynamic.c b/libmcount/dynamic.c index be8d838fd..b1e276a36 100644 --- a/libmcount/dynamic.c +++ b/libmcount/dynamic.c @@ -231,8 +231,7 @@ __weak void mcount_patch_normal_func_fini() { } -__weak int mcount_unpatch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym, - struct mcount_disasm_engine *disasm) +__weak int mcount_unpatch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym) { return -1; } @@ -618,7 +617,7 @@ static void patch_normal_func_matched(struct mcount_dynamic_info *mdi, struct uf else if (match == 1) mcount_patch_func_with_stats(mdi, sym); else - mcount_unpatch_func(mdi, sym, NULL); + mcount_unpatch_func(mdi, sym); } if (!found) From c97f295984fbeb09e34244e06b84815666df9e40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Mon, 10 Apr 2023 08:52:33 -0400 Subject: [PATCH 24/30] dynamic: Batch function unpatching process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Synchronization requires sending signals to threads inside the process. This is performed for every symbols, which means a lot of signal can be sent. This has a major performance impact. This commit batches the initial and final steps of the patching process so we only need to send one signal per thread for every batch. Co-authored-by: Gabriel-Andrew Pollo-Guilbert Signed-off-by: Clément Guidi --- arch/x86_64/mcount-dynamic.c | 125 ++++++++++++++++++++++++----------- libmcount/dynamic.c | 5 ++ 2 files changed, 91 insertions(+), 39 deletions(-) diff --git a/arch/x86_64/mcount-dynamic.c b/arch/x86_64/mcount-dynamic.c index 901846667..13e57ddc1 100644 --- a/arch/x86_64/mcount-dynamic.c +++ b/arch/x86_64/mcount-dynamic.c @@ -398,12 +398,13 @@ int setup_clear_patch_region() return 0; } -/* This list is used to store functions that need to be optimized or cleaned up - * later in the code. In both case, a SIGRTMIN+n must be send. By optimizing or - * cleaning all of them up at the same time, we only need to send one signal per - * thread. +/* These two lists are used to store functions that need to be optimized or + * cleaned up later in the code. In both case, a SIGRTMIN+n must be send. By + * optimizing or cleaning all of them up at the same time, we only need to send + * one signal per thread. */ LIST_HEAD(normal_funcs_patch); +LIST_HEAD(normal_funcs_unpatch); struct patch_dynamic_info { struct list_head list; @@ -935,7 +936,6 @@ static void patch_region_unlock(struct mcount_disasm_info *info) * 0x5: * 0xb: */ - ((uint8_t *)origin_code_addr)[0] = 0xe8; } @@ -1045,13 +1045,45 @@ void mcount_patch_normal_func_fini() extern int check_endbr64(unsigned long addr); /** - * unpatch_normal_func - remove full-dynamic instrumentation from a function - * entry and restore original instructions + * unpatch_code - restore original instructions except the trap at the beginning + * @info - mcount disassembly info + * @return - -1 on error, 0 on success + */ +int unpatch_code(struct mcount_disasm_info *info) +{ + uint8_t *origin_code = info->insns; + uint8_t *origin_code_addr = (uint8_t *)info->addr; + int orig_size = info->orig_size; + + /* + * The second step is to restore the bytes after the trap instruction. + * + * 0x0: int3 + * 0x1: mov %rsp,%rbp + * 0x4: lea 0xeb0(%rip),%rdi + * 0xb: + * + * Before restoring the last byte, a serialization instruction must be + * executed in order to synchronize the instruction cache of each processor. + * The easiest method is to execute a membarrier system call with + * MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE. It will send a + * inter-processor interrupt that will execute the required serialization. + */ + + memcpy(&origin_code_addr[1], &origin_code[1], orig_size - 1); + + return 0; +} + +/** + * unpatch_normal_func_init - perform the initial steps of the unpatching + * process, awaiting for core synchronization. This step is batched with + * subsequent ones. * @mdi - mcount dynamic info for the current module * @sym - symbol to patch * @return - instrumentation status */ -static int unpatch_normal_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym) +static int unpatch_normal_func_init(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym) { /* * Let assume that we have the following instructions. @@ -1074,6 +1106,7 @@ static int unpatch_normal_func(struct mcount_dynamic_info *mdi, struct uftrace_s struct mcount_orig_insn *orig; uint8_t *origin_code; int orig_size; + struct mcount_disasm_info *info; origin_code_addr = mdi->map->start + sym->addr; if (check_endbr64(origin_code_addr)) @@ -1098,27 +1131,30 @@ static int unpatch_normal_func(struct mcount_dynamic_info *mdi, struct uftrace_s * 0xb: */ + register_trap((void *)origin_code_addr, (void *)mdi->trampoline); ((uint8_t *)origin_code_addr)[0] = 0xcc; - /* - * The second step is to restore the bytes after the trap instruction. - * - * 0x0: int3 - * 0x1: mov %rsp,%rbp - * 0x4: lea 0xeb0(%rip),%rdi - * 0xb: - * - * Before restoring the last byte, a serialization instruction must be - * executed in order to synchronize the instruction cache of each processor. - * The easiest method is to execute a membarrier system call with - * MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE. It will send a - * inter-processor interrupt that will execute the required serialization. - */ + info = xmalloc(sizeof(*info)); + memset(info, 0, sizeof(*info)); + info->sym = sym; + info->addr = mdi->map->start + sym->addr; + ASSERT(orig_size < 64); /* fit into info->insns */ + memcpy(info->insns, origin_code, orig_size); + info->orig_size = orig_size; + commit_normal_func(&normal_funcs_unpatch, mdi, info); - synchronize_all_cores(); + return 0; +} - if (!memcpy(&((uint8_t *)origin_code_addr)[1], &origin_code[1], orig_size - 1)) - return -1; +/** + * unpatch_region_unlock - remove the trap at the start of the unpatched region + * @info - mcount disassembly info + * @return - 0 + */ +int unpatch_region_unlock(struct mcount_disasm_info *info) +{ + uint8_t *origin_code = info->insns; + uint8_t *origin_code_addr = (uint8_t *)info->addr; /* * The third step is to restore the last byte. @@ -1129,21 +1165,32 @@ static int unpatch_normal_func(struct mcount_dynamic_info *mdi, struct uftrace_s * 0xb: */ - ((uint8_t *)origin_code_addr)[0] = origin_code[0]; + origin_code_addr[0] = origin_code[0]; - /* - * The fourth and last step is to move all thread that are currently - * executing in the modified instructions to the original instructions at - * the end of the trampoline. This is needed in order to free the memory - * allocated for the trampoline without any race condition. - * - * The method used to move the threads is to send the SIGRTMIN+n signal to - * all other threads. When their thread handler executes, it will check if - * the next instruction pointer is in the patching region. If it is, will - * move the next instruction pointer to the equivalent modified instruction. - */ + return 0; +} - return INSTRUMENT_SUCCESS; +/** + * mcount_unpatch_normal_func_fini - perform the final steps of the unpatching + * process and cleanup + */ +void mcount_unpatch_normal_func_fini() +{ + struct patch_dynamic_info *pdi, *tmp; + + if (list_empty(&normal_funcs_unpatch)) + return; + + synchronize_all_cores(); + + list_for_each_entry_safe(pdi, tmp, &normal_funcs_unpatch, list) { + unpatch_code(pdi->info); + write_memory_barrier(); + unpatch_region_unlock(pdi->info); + list_del(&pdi->list); + free(pdi->info); + free(pdi); + } } static int unpatch_func(uint8_t *insn, char *name) @@ -1255,7 +1302,7 @@ int mcount_unpatch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol * break; case DYNAMIC_NONE: - result = unpatch_normal_func(mdi, sym); + result = unpatch_normal_func_init(mdi, sym); break; default: diff --git a/libmcount/dynamic.c b/libmcount/dynamic.c index b1e276a36..b6ae17d3e 100644 --- a/libmcount/dynamic.c +++ b/libmcount/dynamic.c @@ -231,6 +231,10 @@ __weak void mcount_patch_normal_func_fini() { } +__weak void mcount_unpatch_normal_func_fini() +{ +} + __weak int mcount_unpatch_func(struct mcount_dynamic_info *mdi, struct uftrace_symbol *sym) { return -1; @@ -624,6 +628,7 @@ static void patch_normal_func_matched(struct mcount_dynamic_info *mdi, struct uf stats.nomatch++; mcount_patch_normal_func_fini(); + mcount_unpatch_normal_func_fini(); free(soname); } From 05a4a04e9266831cf1b6a1709d1e834ba83d0363 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Wed, 12 Apr 2023 09:16:40 -0400 Subject: [PATCH 25/30] mcount: Store filter settings as global variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a global definition of the filter settings so the agent can modify them easily. Signed-off-by: Clément Guidi --- libmcount/mcount.c | 62 ++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/libmcount/mcount.c b/libmcount/mcount.c index 4717a8208..0042c4370 100644 --- a/libmcount/mcount.c +++ b/libmcount/mcount.c @@ -76,6 +76,13 @@ int page_size_in_kb; /* call depth to filter */ static int __maybe_unused mcount_depth = MCOUNT_DEFAULT_DEPTH; +/* setting for all filter actions */ +static struct uftrace_filter_setting mcount_filter_setting = { + .ptype = PATT_REGEX, + .auto_args = false, + .allow_kernel = false, +}; + /* boolean flag to turn on/off recording */ static bool __maybe_unused mcount_enabled = true; @@ -114,7 +121,7 @@ __weak void dynamic_return(void) #ifdef DISABLE_MCOUNT_FILTER -static void mcount_filter_init(enum uftrace_pattern_type ptype, bool force) +static void mcount_filter_init(struct uftrace_filter_setting *filter_setting, bool force) { if (getenv("UFTRACE_SRCLINE") == NULL) return; @@ -122,7 +129,7 @@ static void mcount_filter_init(enum uftrace_pattern_type ptype, bool force) load_module_symtabs(&mcount_sym_info); /* use debug info if available */ - prepare_debug_info(&mcount_sym_info, ptype, NULL, NULL, false, force); + prepare_debug_info(&mcount_sym_info, filter_setting->ptype, NULL, NULL, false, force); save_debug_info(&mcount_sym_info, mcount_sym_info.dirname); } @@ -366,7 +373,7 @@ static void mcount_signal_finish(void) } } -static void mcount_filter_init(enum uftrace_pattern_type ptype, bool force) +static void mcount_filter_init(struct uftrace_filter_setting *filter_setting, bool force) { char *filter_str = getenv("UFTRACE_FILTER"); char *trigger_str = getenv("UFTRACE_TRIGGER"); @@ -376,23 +383,19 @@ static void mcount_filter_init(enum uftrace_pattern_type ptype, bool force) char *caller_str = getenv("UFTRACE_CALLER"); char *loc_str = getenv("UFTRACE_LOCATION"); - struct uftrace_filter_setting filter_setting = { - .ptype = ptype, - .auto_args = false, - .allow_kernel = false, - .lp64 = host_is_lp64(), - .arch = host_cpu_arch(), - }; + filter_setting->lp64 = host_is_lp64(); + filter_setting->arch = host_cpu_arch(); + bool needs_debug_info = false; load_module_symtabs(&mcount_sym_info); - mcount_signal_init(getenv("UFTRACE_SIGNAL"), &filter_setting); + mcount_signal_init(getenv("UFTRACE_SIGNAL"), filter_setting); /* setup auto-args only if argument/return value is used */ if (argument_str || retval_str || autoargs_str || (trigger_str && (strstr(trigger_str, "arg") || strstr(trigger_str, "retval")))) { - setup_auto_args(&filter_setting); + setup_auto_args(filter_setting); needs_debug_info = true; } @@ -401,28 +404,28 @@ static void mcount_filter_init(enum uftrace_pattern_type ptype, bool force) /* use debug info if available */ if (needs_debug_info) { - prepare_debug_info(&mcount_sym_info, ptype, argument_str, retval_str, - !!autoargs_str, force); + prepare_debug_info(&mcount_sym_info, filter_setting->ptype, argument_str, + retval_str, !!autoargs_str, force); save_debug_info(&mcount_sym_info, mcount_sym_info.dirname); } mcount_triggers = xmalloc(sizeof(*mcount_triggers)); *mcount_triggers = RB_ROOT; uftrace_setup_filter(filter_str, &mcount_sym_info, mcount_triggers, &mcount_filter_mode, - &filter_setting); + filter_setting); uftrace_setup_trigger(trigger_str, &mcount_sym_info, mcount_triggers, &mcount_filter_mode, - &filter_setting); - uftrace_setup_argument(argument_str, &mcount_sym_info, mcount_triggers, &filter_setting); - uftrace_setup_retval(retval_str, &mcount_sym_info, mcount_triggers, &filter_setting); + filter_setting); + uftrace_setup_argument(argument_str, &mcount_sym_info, mcount_triggers, filter_setting); + uftrace_setup_retval(retval_str, &mcount_sym_info, mcount_triggers, filter_setting); if (needs_debug_info) { uftrace_setup_loc_filter(loc_str, &mcount_sym_info, mcount_triggers, - &mcount_loc_mode, &filter_setting); + &mcount_loc_mode, filter_setting); } if (caller_str) { uftrace_setup_caller_filter(caller_str, &mcount_sym_info, mcount_triggers, - &filter_setting); + filter_setting); } /* there might be caller triggers, count it separately */ @@ -433,13 +436,13 @@ static void mcount_filter_init(enum uftrace_pattern_type ptype, bool force) char *autoarg = "."; char *autoret = "."; - if (ptype == PATT_GLOB) + if (filter_setting->ptype == PATT_GLOB) autoarg = autoret = "*"; - filter_setting.auto_args = true; + filter_setting->auto_args = true; - uftrace_setup_argument(autoarg, &mcount_sym_info, mcount_triggers, &filter_setting); - uftrace_setup_retval(autoret, &mcount_sym_info, mcount_triggers, &filter_setting); + uftrace_setup_argument(autoarg, &mcount_sym_info, mcount_triggers, filter_setting); + uftrace_setup_retval(autoret, &mcount_sym_info, mcount_triggers, filter_setting); } if (getenv("UFTRACE_DEPTH")) @@ -2060,7 +2063,6 @@ static __used void mcount_startup(void) char *symdir_str; struct stat statbuf; bool nest_libcall; - enum uftrace_pattern_type patt_type = PATT_REGEX; if (!(mcount_global_flags & MCOUNT_GFL_SETUP)) return; @@ -2161,14 +2163,14 @@ static __used void mcount_startup(void) record_proc_maps(dirname, mcount_session_name(), &mcount_sym_info); if (pattern_str) - patt_type = parse_filter_pattern(pattern_str); + mcount_filter_setting.ptype = parse_filter_pattern(pattern_str); if (patch_str) mcount_return_fn = (unsigned long)dynamic_return; else mcount_return_fn = (unsigned long)mcount_return; - mcount_filter_init(patt_type, !!patch_str); + mcount_filter_init(&mcount_filter_setting, !!patch_str); mcount_watch_init(); if (maxstack_str) @@ -2181,10 +2183,10 @@ static __used void mcount_startup(void) mcount_min_size = strtoul(minsize_str, NULL, 0); if (patch_str) - mcount_dynamic_update(&mcount_sym_info, patch_str, patt_type); + mcount_dynamic_update(&mcount_sym_info, patch_str, mcount_filter_setting.ptype); if (event_str) - mcount_setup_events(dirname, event_str, patt_type); + mcount_setup_events(dirname, event_str, mcount_filter_setting.ptype); if (getenv("UFTRACE_KERNEL_PID_UPDATE")) kernel_pid_update = true; @@ -2209,7 +2211,7 @@ static __used void mcount_startup(void) /* initialize script binding */ if (SCRIPT_ENABLED && script_str) - mcount_script_init(patt_type); + mcount_script_init(mcount_filter_setting.ptype); compiler_barrier(); pr_dbg("mcount setup done\n"); From 606e5d72ff3cde5382575de3936c3d2044314f23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Mon, 13 Feb 2023 12:00:38 -0500 Subject: [PATCH 26/30] mcount: Set match pattern type at runtime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the pattern type in the agent so it can interpret subsequent options (e.g. filters, patch strings). Signed-off-by: Clément Guidi --- cmds/live.c | 7 ++++--- libmcount/mcount.c | 11 ++++++++++- uftrace.h | 2 +- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/cmds/live.c b/cmds/live.c index b641f7e2d..9f77c4e02 100644 --- a/cmds/live.c +++ b/cmds/live.c @@ -340,9 +340,10 @@ static int forward_options(struct uftrace_opts *opts) if (capabilities < 0) goto close; - /* FIXME Forward user options and set status */ - if (0) - status = forward_option(sfd, capabilities, 0, NULL, 0); + status = forward_option(sfd, capabilities, UFTRACE_AGENT_OPT_PATTERN, &opts->patt_type, + sizeof(opts->patt_type)); + if (status < 0) + goto close; close: status_close = agent_message_send(sfd, UFTRACE_MSG_AGENT_CLOSE, NULL, 0); diff --git a/libmcount/mcount.c b/libmcount/mcount.c index 0042c4370..c02c19c1b 100644 --- a/libmcount/mcount.c +++ b/libmcount/mcount.c @@ -113,7 +113,7 @@ static pthread_t agent; /* state flag for the agent */ static volatile bool agent_run = false; -#define MCOUNT_AGENT_CAPABILITIES 0 +#define MCOUNT_AGENT_CAPABILITIES (UFTRACE_AGENT_OPT_PATTERN) __weak void dynamic_return(void) { @@ -1867,9 +1867,18 @@ static int agent_read_option(int fd, int *opt, void **value, size_t read_size) */ static int agent_apply_option(int opt, void *value, size_t size, struct rb_root *triggers) { + enum uftrace_pattern_type patt_type; int ret = 0; switch (opt) { + case UFTRACE_AGENT_OPT_PATTERN: + patt_type = *((int *)value); + if (patt_type != mcount_filter_setting.ptype) { + mcount_filter_setting.ptype = patt_type; + pr_dbg4("use pattern type %#x\n", patt_type); + } + break; + default: ret = -1; } diff --git a/uftrace.h b/uftrace.h index 2ee74fe01..7eaa4270a 100644 --- a/uftrace.h +++ b/uftrace.h @@ -458,7 +458,7 @@ struct uftrace_msg_dlopen { }; enum uftrace_agent_opt { - UFTRACE_AGENT_OPT_XXX, + UFTRACE_AGENT_OPT_PATTERN = (1U << 0), /* pattern match type */ }; extern struct uftrace_session *first_session; From c47c1177ada480ae1f58afc800b4415bd5a22310 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Tue, 7 Mar 2023 10:25:02 -0500 Subject: [PATCH 27/30] uftrace: Apply dynamic (un)patching at runtime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the '--patch' and '--unpatch' options in the client. When used, the agent patches or unpatches symbols on the fly. Co-authored-by: Gabriel-Andrew Pollo-Guilbert Signed-off-by: Clément Guidi --- cmds/live.c | 17 +++++++++++++---- libmcount/mcount.c | 7 ++++++- uftrace.h | 1 + 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/cmds/live.c b/cmds/live.c index 9f77c4e02..1b06550f9 100644 --- a/cmds/live.c +++ b/cmds/live.c @@ -340,10 +340,19 @@ static int forward_options(struct uftrace_opts *opts) if (capabilities < 0) goto close; - status = forward_option(sfd, capabilities, UFTRACE_AGENT_OPT_PATTERN, &opts->patt_type, - sizeof(opts->patt_type)); - if (status < 0) - goto close; + if (opts->patch) { /* provide a pattern type for options that need it */ + status = forward_option(sfd, capabilities, UFTRACE_AGENT_OPT_PATTERN, + &opts->patt_type, sizeof(opts->patt_type)); + if (status < 0) + goto close; + } + + if (opts->patch) { + status = forward_option(sfd, capabilities, UFTRACE_AGENT_OPT_PATCH, opts->patch, + strlen(opts->patch) + 1); + if (status < 0) + goto close; + } close: status_close = agent_message_send(sfd, UFTRACE_MSG_AGENT_CLOSE, NULL, 0); diff --git a/libmcount/mcount.c b/libmcount/mcount.c index c02c19c1b..ba0082d61 100644 --- a/libmcount/mcount.c +++ b/libmcount/mcount.c @@ -113,7 +113,7 @@ static pthread_t agent; /* state flag for the agent */ static volatile bool agent_run = false; -#define MCOUNT_AGENT_CAPABILITIES (UFTRACE_AGENT_OPT_PATTERN) +#define MCOUNT_AGENT_CAPABILITIES (UFTRACE_AGENT_OPT_PATTERN | UFTRACE_AGENT_OPT_PATCH) __weak void dynamic_return(void) { @@ -1879,6 +1879,11 @@ static int agent_apply_option(int opt, void *value, size_t size, struct rb_root } break; + case UFTRACE_AGENT_OPT_PATCH: + pr_dbg4("apply patch '%s' (size=%d)\n", value, size); + mcount_dynamic_update(&mcount_sym_info, value, mcount_filter_setting.ptype); + break; + default: ret = -1; } diff --git a/uftrace.h b/uftrace.h index 7eaa4270a..c1fbcc67d 100644 --- a/uftrace.h +++ b/uftrace.h @@ -459,6 +459,7 @@ struct uftrace_msg_dlopen { enum uftrace_agent_opt { UFTRACE_AGENT_OPT_PATTERN = (1U << 0), /* pattern match type */ + UFTRACE_AGENT_OPT_PATCH = (1U << 1), /* patch string */ }; extern struct uftrace_session *first_session; From 9c8977a75362d2fda7fde4a8047892259ad0809b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Mon, 10 Apr 2023 09:07:53 -0400 Subject: [PATCH 28/30] mcount: Use flag to indicate if target is running MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a global flag to indicate the state of the target. When the target is not running, tasks such as dynamic patching can be performed with less constraints. If libmcount.so is dynamically injected (not implemented yet), the 'mcount_target_running' flag indicates that libmcount has to be initialized in a running target. Signed-off-by: Clément Guidi --- libmcount/mcount.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libmcount/mcount.c b/libmcount/mcount.c index ba0082d61..c185592a5 100644 --- a/libmcount/mcount.c +++ b/libmcount/mcount.c @@ -52,6 +52,9 @@ bool mcount_auto_recover = ARCH_SUPPORT_AUTO_RECOVER; /* global flag to control mcount behavior */ unsigned long mcount_global_flags = MCOUNT_GFL_SETUP; +/* global to indicate if the target is executing */ +bool mcount_target_running = false; + /* TSD key to save mtd below */ pthread_key_t mtd_key = (pthread_key_t)-1; @@ -2232,6 +2235,7 @@ static __used void mcount_startup(void) mcount_global_flags &= ~MCOUNT_GFL_SETUP; mtd.recursion_marker = false; + mcount_target_running = true; } static void mcount_cleanup(void) From 4f8b53196cc8cfa170472b2828c6afca3901d5d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Mon, 10 Apr 2023 09:11:55 -0400 Subject: [PATCH 29/30] dynamic: x86_64: Use soft patching in cold targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When patching a binary before its execution, we can skip the serialization and critical zone exclusion steps. These steps are only use full when cross-modification occurs, at runtime. Signed-off-by: Clément Guidi --- arch/x86_64/mcount-dynamic.c | 60 ++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/arch/x86_64/mcount-dynamic.c b/arch/x86_64/mcount-dynamic.c index 13e57ddc1..a831c6eb1 100644 --- a/arch/x86_64/mcount-dynamic.c +++ b/arch/x86_64/mcount-dynamic.c @@ -28,6 +28,8 @@ static const unsigned char patchable_clang_nop[] = { 0x0f, 0x1f, 0x44, 0x00, 0x0 static const unsigned char endbr64[] = { 0xf3, 0x0f, 0x1e, 0xfa }; +extern bool mcount_target_running; + /* * Hashmap of trampoline locations for every installed int3 trap. The traps are * used when patching and unpatching functions: they prevent threads from @@ -997,8 +999,14 @@ static int patch_normal_func_init(struct mcount_dynamic_info *mdi, struct uftrac else mcount_save_code(info, call_offset, jmp_insn, sizeof(jmp_insn)); - state = patch_region_lock(mdi, info); - commit_normal_func(&normal_funcs_patch, mdi, info); + if (mcount_target_running) { + state = patch_region_lock(mdi, info); + commit_normal_func(&normal_funcs_patch, mdi, info); + } + else { + patch_code(mdi, info); + patch_region_unlock(info); + } return state; } @@ -1011,6 +1019,9 @@ void mcount_patch_normal_func_fini() { struct patch_dynamic_info *pdi, *tmp; + if (!mcount_target_running) + return; + if (list_empty(&normal_funcs_patch)) return; @@ -1122,26 +1133,32 @@ static int unpatch_normal_func_init(struct mcount_dynamic_info *mdi, struct uftr pr_dbg2("unpatch normal func: %s (patch size: %d)\n", sym->name, orig_size); - /* - * The first step is to insert a trap. - * - * 0x0: int3 - * 0x1: - * 0x5: - * 0xb: - */ + if (mcount_target_running) { + info = xmalloc(sizeof(*info)); + memset(info, 0, sizeof(*info)); + info->sym = sym; + info->addr = mdi->map->start + sym->addr; + ASSERT(orig_size < 64); /* fit into info->insns */ + memcpy(info->insns, origin_code, orig_size); + info->orig_size = orig_size; + + /* The first step is to insert a trap. + * + * 0x0: int3 + * 0x1: + * 0x5: + * 0xb: + */ - register_trap((void *)origin_code_addr, (void *)mdi->trampoline); - ((uint8_t *)origin_code_addr)[0] = 0xcc; + register_trap((void *)origin_code_addr, (void *)mdi->trampoline); + ((uint8_t *)origin_code_addr)[0] = 0xcc; - info = xmalloc(sizeof(*info)); - memset(info, 0, sizeof(*info)); - info->sym = sym; - info->addr = mdi->map->start + sym->addr; - ASSERT(orig_size < 64); /* fit into info->insns */ - memcpy(info->insns, origin_code, orig_size); - info->orig_size = orig_size; - commit_normal_func(&normal_funcs_unpatch, mdi, info); + commit_normal_func(&normal_funcs_unpatch, mdi, info); + } + else { + if (!memcpy((uint8_t *)origin_code_addr, origin_code, orig_size)) + return INSTRUMENT_FAILED; + } return 0; } @@ -1178,6 +1195,9 @@ void mcount_unpatch_normal_func_fini() { struct patch_dynamic_info *pdi, *tmp; + if (!mcount_target_running) + return; + if (list_empty(&normal_funcs_unpatch)) return; From d146f900bd0df8de6286e0d0bb61960c7a5e0f79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Guidi?= Date: Wed, 3 May 2023 13:49:03 -0400 Subject: [PATCH 30/30] dynamic: Collect and display dynamic stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Print dynamic stats for patching and unpatching. Signed-off-by: Clément Guidi --- libmcount/dynamic.c | 86 ++++++++++++++++++++++++++++++++------------- libmcount/dynamic.h | 6 ++++ 2 files changed, 67 insertions(+), 25 deletions(-) diff --git a/libmcount/dynamic.c b/libmcount/dynamic.c index b6ae17d3e..88e39bb9e 100644 --- a/libmcount/dynamic.c +++ b/libmcount/dynamic.c @@ -32,13 +32,8 @@ #include "utils/utils.h" static struct mcount_dynamic_info *mdinfo; -static struct mcount_dynamic_stats { - int total; - int failed; - int skipped; - int nomatch; - int unpatch; -} stats; +static struct mcount_dynamic_stats patch_stats, unpatch_stats; +static int stats_nomatch; #define PAGE_SIZE 4096 #define CODE_CHUNK (PAGE_SIZE * 8) @@ -545,16 +540,31 @@ static void mcount_patch_func_with_stats(struct mcount_dynamic_info *mdi, { switch (mcount_patch_func(mdi, sym, &disasm, min_size)) { case INSTRUMENT_FAILED: - stats.failed++; + patch_stats.failed++; break; case INSTRUMENT_SKIPPED: - stats.skipped++; + patch_stats.skipped++; break; case INSTRUMENT_SUCCESS: - default: + patch_stats.success++; + break; + } +} + +static void mcount_unpatch_func_with_stats(struct mcount_dynamic_info *mdi, + struct uftrace_symbol *sym) +{ + switch (mcount_unpatch_func(mdi, sym)) { + case INSTRUMENT_FAILED: + unpatch_stats.failed++; + break; + case INSTRUMENT_SKIPPED: + unpatch_stats.skipped++; + break; + case INSTRUMENT_SUCCESS: + unpatch_stats.success++; break; } - stats.total++; } static void patch_patchable_func_matched(struct mcount_dynamic_info *mdi, struct uftrace_mmap *map) @@ -621,11 +631,11 @@ static void patch_normal_func_matched(struct mcount_dynamic_info *mdi, struct uf else if (match == 1) mcount_patch_func_with_stats(mdi, sym); else - mcount_unpatch_func(mdi, sym); + mcount_unpatch_func_with_stats(mdi, sym); } if (!found) - stats.nomatch++; + stats_nomatch++; mcount_patch_normal_func_fini(); mcount_unpatch_normal_func_fini(); @@ -668,6 +678,9 @@ static int do_dynamic_update(struct uftrace_sym_info *sinfo, char *patch_funcs, def_mod = basename(sinfo->exec_map->libname); parse_pattern_list(patch_funcs, def_mod, ptype); + memset(&patch_stats, 0, sizeof(patch_stats)); + memset(&unpatch_stats, 0, sizeof(unpatch_stats)); + /* TODO: filter out unsupported libs */ for (mdi = mdinfo; mdi != NULL; mdi = mdi->next) { if (mdi->type == DYNAMIC_NONE && !mcount_dynamic_is_initialized) { @@ -684,8 +697,15 @@ static int do_dynamic_update(struct uftrace_sym_info *sinfo, char *patch_funcs, release_pattern_list(); - if (stats.failed + stats.skipped + stats.nomatch == 0) { - pr_dbg("patched all (%d) functions in '%s'\n", stats.total, + if (patch_stats.success && patch_stats.failed + patch_stats.skipped == 0) { + int patch_total = patch_stats.success + patch_stats.failed + patch_stats.skipped; + pr_dbg("patched all (%d) functions in '%s'\n", patch_total, + basename(sinfo->filename)); + } + if (unpatch_stats.success && unpatch_stats.failed + unpatch_stats.skipped == 0) { + int unpatch_total = + unpatch_stats.success + unpatch_stats.failed + unpatch_stats.skipped; + pr_dbg("unpatched all (%d) functions in '%s'\n", unpatch_total, basename(sinfo->filename)); } @@ -721,21 +741,37 @@ int mcount_dynamic_update(struct uftrace_sym_info *sinfo, char *patch_funcs, ret = do_dynamic_update(sinfo, patch_funcs, ptype); - if (stats.total && (stats.failed || stats.skipped)) { - int success = stats.total - stats.failed - stats.skipped; + if (patch_stats.failed || patch_stats.skipped) { + int total = patch_stats.success + patch_stats.failed + patch_stats.skipped; int r, q; pr_dbg("dynamic patch stats for '%s'\n", basename(sinfo->filename)); - pr_dbg(" total: %8d\n", stats.total); - q = calc_percent(success, stats.total, &r); - pr_dbg(" patched: %8d (%2d.%02d%%)\n", success, q, r); - q = calc_percent(stats.failed, stats.total, &r); - pr_dbg(" failed: %8d (%2d.%02d%%)\n", stats.failed, q, r); - q = calc_percent(stats.skipped, stats.total, &r); - pr_dbg(" skipped: %8d (%2d.%02d%%)\n", stats.skipped, q, r); - pr_dbg("no match: %8d\n", stats.nomatch); + pr_dbg(" total: %8d\n", total); + q = calc_percent(patch_stats.success, total, &r); + pr_dbg(" patched: %8d (%2d.%02d%%)\n", patch_stats.success, q, r); + q = calc_percent(patch_stats.failed, total, &r); + pr_dbg(" failed: %8d (%2d.%02d%%)\n", patch_stats.failed, q, r); + q = calc_percent(patch_stats.skipped, total, &r); + pr_dbg(" skipped: %8d (%2d.%02d%%)\n", patch_stats.skipped, q, r); } + if (unpatch_stats.failed || unpatch_stats.skipped) { + int total = unpatch_stats.success + unpatch_stats.failed + unpatch_stats.skipped; + int r, q; + + pr_dbg("dynamic unpatch stats for '%s'\n", basename(sinfo->filename)); + pr_dbg(" total: %8d\n", total); + q = calc_percent(unpatch_stats.success, total, &r); + pr_dbg(" unpatched: %8d (%2d.%02d%%)\n", unpatch_stats.success, q, r); + q = calc_percent(unpatch_stats.failed, total, &r); + pr_dbg(" failed: %8d (%2d.%02d%%)\n", unpatch_stats.failed, q, r); + q = calc_percent(unpatch_stats.skipped, total, &r); + pr_dbg(" skipped: %8d (%2d.%02d%%)\n", unpatch_stats.skipped, q, r); + } + + if (stats_nomatch) + pr_dbg("no match: %8d\n", stats_nomatch); + mcount_freeze_code(); return ret; } diff --git a/libmcount/dynamic.h b/libmcount/dynamic.h index c687e5e37..21055fc43 100644 --- a/libmcount/dynamic.h +++ b/libmcount/dynamic.h @@ -68,6 +68,12 @@ struct mcount_disasm_engine { #define INSTRUMENT_FAILED -1 #define INSTRUMENT_SKIPPED -2 +struct mcount_dynamic_stats { + int success; + int failed; + int skipped; +}; + /* * Supposing the size of smallest conditional branch is 2 byte. * We can replace, at most, 3 of them by the instrumentation