From 3eb98a13183703c36cc481189e1b362d7077a9b4 Mon Sep 17 00:00:00 2001 From: Kuba Mracek Date: Fri, 2 Dec 2016 21:27:14 +0000 Subject: [PATCH] [sanitizer] Track architecture and UUID of modules in LoadedModule When we enumerate loaded modules, we only track the module name and base address, which then has several problems on macOS. Dylibs and executables often have several architecture slices and not storing which architecture/UUID is actually loaded creates problems with symbolication: A file path + offset isn't enough to correctly symbolicate, since the offset can be valid in multiple slices. This is especially common for Haswell+ X86_64 machines, where x86_64h slices are preferred, but if one is not available, a regular x86_64 is loaded instead. But the same issue exists for i386 vs. x86_64 as well. This patch adds tracking of arch and UUID for each LoadedModule. At this point, this information isn't used in reports, but this is the first step. The goal is to correctly identify which slice is loaded in symbolication, and also to output this information in reports so that we can tell which exact slices were loaded in post-mortem analysis. Differential Revision: https://reviews.llvm.org/D26632 llvm-svn: 288537 --- .../lib/sanitizer_common/sanitizer_common.cc | 9 +++ .../lib/sanitizer_common/sanitizer_common.h | 26 ++++++- .../lib/sanitizer_common/sanitizer_procmaps.h | 13 ++-- .../sanitizer_procmaps_freebsd.cc | 4 +- .../sanitizer_procmaps_linux.cc | 4 +- .../sanitizer_procmaps_mac.cc | 77 ++++++++++++++++--- .../tests/sanitizer_procmaps_test.cc | 21 +++++ 7 files changed, 136 insertions(+), 18 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.cc b/compiler-rt/lib/sanitizer_common/sanitizer_common.cc index 554c0e3c5f0f0..1c6fc3ef86a3e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.cc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.cc @@ -259,9 +259,18 @@ void LoadedModule::set(const char *module_name, uptr base_address) { base_address_ = base_address; } +void LoadedModule::set(const char *module_name, uptr base_address, + ModuleArch arch, u8 uuid[kModuleUUIDSize]) { + set(module_name, base_address); + arch_ = arch; + internal_memcpy(uuid_, uuid, sizeof(uuid_)); +} + void LoadedModule::clear() { InternalFree(full_name_); full_name_ = nullptr; + arch_ = kModuleArchUnknown; + internal_memset(uuid_, 0, kModuleUUIDSize); while (!ranges_.empty()) { AddressRange *r = ranges_.front(); ranges_.pop_front(); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index bf66d00d6ae3a..57ed35ba4165c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -646,18 +646,40 @@ uptr InternalLowerBound(const Container &v, uptr first, uptr last, return first; } +enum ModuleArch { + kModuleArchUnknown, + kModuleArchI386, + kModuleArchX86_64, + kModuleArchX86_64H, + kModuleArchARMV6, + kModuleArchARMV7, + kModuleArchARMV7S, + kModuleArchARMV7K, + kModuleArchARM64 +}; + +const uptr kModuleUUIDSize = 16; + // Represents a binary loaded into virtual memory (e.g. this can be an // executable or a shared object). class LoadedModule { public: - LoadedModule() : full_name_(nullptr), base_address_(0) { ranges_.clear(); } + LoadedModule() + : full_name_(nullptr), base_address_(0), arch_(kModuleArchUnknown) { + internal_memset(uuid_, 0, kModuleUUIDSize); + ranges_.clear(); + } void set(const char *module_name, uptr base_address); + void set(const char *module_name, uptr base_address, ModuleArch arch, + u8 uuid[kModuleUUIDSize]); void clear(); void addAddressRange(uptr beg, uptr end, bool executable); bool containsAddress(uptr address) const; const char *full_name() const { return full_name_; } uptr base_address() const { return base_address_; } + ModuleArch arch() const { return arch_; } + const u8 *uuid() const { return uuid_; } struct AddressRange { AddressRange *next; @@ -674,6 +696,8 @@ class LoadedModule { private: char *full_name_; // Owned. uptr base_address_; + ModuleArch arch_; + u8 uuid_[kModuleUUIDSize]; IntrusiveList ranges_; }; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h index 1fe59ab895325..5c26fb77e6866 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h @@ -35,8 +35,9 @@ class MemoryMappingLayout { public: explicit MemoryMappingLayout(bool cache_enabled); ~MemoryMappingLayout(); - bool Next(uptr *start, uptr *end, uptr *offset, - char filename[], uptr filename_size, uptr *protection); + bool Next(uptr *start, uptr *end, uptr *offset, char filename[], + uptr filename_size, uptr *protection, ModuleArch *arch = nullptr, + u8 *uuid = nullptr); void Reset(); // In some cases, e.g. when running under a sandbox on Linux, ASan is unable // to obtain the memory mappings. It should fall back to pre-cached data @@ -65,13 +66,15 @@ class MemoryMappingLayout { static ProcSelfMapsBuff cached_proc_self_maps_; static StaticSpinMutex cache_lock_; // protects cached_proc_self_maps_. # elif SANITIZER_MAC - template - bool NextSegmentLoad(uptr *start, uptr *end, uptr *offset, - char filename[], uptr filename_size, + template + bool NextSegmentLoad(uptr *start, uptr *end, uptr *offset, char filename[], + uptr filename_size, ModuleArch *arch, u8 *uuid, uptr *protection); int current_image_; u32 current_magic_; u32 current_filetype_; + ModuleArch current_arch_; + u8 current_uuid_[kModuleUUIDSize]; int current_load_cmd_count_; char *current_load_cmd_addr_; # endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_freebsd.cc b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_freebsd.cc index 5011b1ff14b21..30216456330ee 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_freebsd.cc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_freebsd.cc @@ -50,7 +50,9 @@ void ReadProcMaps(ProcSelfMapsBuff *proc_maps) { bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset, char filename[], uptr filename_size, - uptr *protection) { + uptr *protection, ModuleArch *arch, u8 *uuid) { + CHECK(!arch && "not implemented"); + CHECK(!uuid && "not implemented"); char *last = proc_self_maps_.data + proc_self_maps_.len; if (current_ >= last) return false; uptr dummy; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_linux.cc b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_linux.cc index b6fb7034ded4e..fdf85b77a680f 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_linux.cc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_linux.cc @@ -28,7 +28,9 @@ static bool IsOneOf(char c, char c1, char c2) { bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset, char filename[], uptr filename_size, - uptr *protection) { + uptr *protection, ModuleArch *arch, u8 *uuid) { + CHECK(!arch && "not implemented"); + CHECK(!uuid && "not implemented"); char *last = proc_self_maps_.data + proc_self_maps_.len; if (current_ >= last) return false; uptr dummy; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cc b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cc index 417cc908e2478..2b4ad5cbbbf0a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cc @@ -53,6 +53,8 @@ void MemoryMappingLayout::Reset() { current_load_cmd_addr_ = 0; current_magic_ = 0; current_filetype_ = 0; + current_arch_ = kModuleArchUnknown; + internal_memset(current_uuid_, 0, kModuleUUIDSize); } // static @@ -71,11 +73,12 @@ void MemoryMappingLayout::LoadFromCache() { // and returns the start and end addresses and file offset of the corresponding // segment. // Note that the segment addresses are not necessarily sorted. -template -bool MemoryMappingLayout::NextSegmentLoad( - uptr *start, uptr *end, uptr *offset, - char filename[], uptr filename_size, uptr *protection) { - const char* lc = current_load_cmd_addr_; +template +bool MemoryMappingLayout::NextSegmentLoad(uptr *start, uptr *end, uptr *offset, + char filename[], uptr filename_size, + ModuleArch *arch, u8 *uuid, + uptr *protection) { + const char *lc = current_load_cmd_addr_; current_load_cmd_addr_ += ((const load_command *)lc)->cmdsize; if (((const load_command *)lc)->cmd == kLCSegment) { const sptr dlloff = _dyld_get_image_vmaddr_slide(current_image_); @@ -97,14 +100,61 @@ bool MemoryMappingLayout::NextSegmentLoad( internal_strncpy(filename, _dyld_get_image_name(current_image_), filename_size); } + if (arch) { + *arch = current_arch_; + } + if (uuid) { + internal_memcpy(uuid, current_uuid_, kModuleUUIDSize); + } return true; } return false; } +ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) { + cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK; + switch (cputype) { + case CPU_TYPE_I386: + return kModuleArchI386; + case CPU_TYPE_X86_64: + if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64; + if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H; + CHECK(0 && "Invalid subtype of x86_64"); + return kModuleArchUnknown; + case CPU_TYPE_ARM: + if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6; + if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7; + if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S; + if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K; + CHECK(0 && "Invalid subtype of ARM"); + return kModuleArchUnknown; + case CPU_TYPE_ARM64: + return kModuleArchARM64; + default: + CHECK(0 && "Invalid CPU type"); + return kModuleArchUnknown; + } +} + +static void FindUUID(const load_command *first_lc, u8 *uuid_output) { + const load_command *current_lc = first_lc; + while (1) { + if (current_lc->cmd == 0) return; + if (current_lc->cmd == LC_UUID) { + const uuid_command *uuid_lc = (const uuid_command *)current_lc; + const uint8_t *uuid = &uuid_lc->uuid[0]; + internal_memcpy(uuid_output, uuid, kModuleUUIDSize); + return; + } + + current_lc = + (const load_command *)(((char *)current_lc) + current_lc->cmdsize); + } +} + bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset, char filename[], uptr filename_size, - uptr *protection) { + uptr *protection, ModuleArch *arch, u8 *uuid) { for (; current_image_ >= 0; current_image_--) { const mach_header* hdr = _dyld_get_image_header(current_image_); if (!hdr) continue; @@ -113,6 +163,7 @@ bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset, current_load_cmd_count_ = hdr->ncmds; current_magic_ = hdr->magic; current_filetype_ = hdr->filetype; + current_arch_ = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype); switch (current_magic_) { #ifdef MH_MAGIC_64 case MH_MAGIC_64: { @@ -130,20 +181,24 @@ bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset, } } + FindUUID((const load_command *)current_load_cmd_addr_, ¤t_uuid_[0]); + for (; current_load_cmd_count_ >= 0; current_load_cmd_count_--) { switch (current_magic_) { // current_magic_ may be only one of MH_MAGIC, MH_MAGIC_64. #ifdef MH_MAGIC_64 case MH_MAGIC_64: { if (NextSegmentLoad( - start, end, offset, filename, filename_size, protection)) + start, end, offset, filename, filename_size, arch, uuid, + protection)) return true; break; } #endif case MH_MAGIC: { if (NextSegmentLoad( - start, end, offset, filename, filename_size, protection)) + start, end, offset, filename, filename_size, arch, uuid, + protection)) return true; break; } @@ -159,9 +214,11 @@ void MemoryMappingLayout::DumpListOfModules( InternalMmapVector *modules) { Reset(); uptr cur_beg, cur_end, prot; + ModuleArch cur_arch; + u8 cur_uuid[kModuleUUIDSize]; InternalScopedString module_name(kMaxPathLength); for (uptr i = 0; Next(&cur_beg, &cur_end, 0, module_name.data(), - module_name.size(), &prot); + module_name.size(), &prot, &cur_arch, &cur_uuid[0]); i++) { const char *cur_name = module_name.data(); if (cur_name[0] == '\0') @@ -173,7 +230,7 @@ void MemoryMappingLayout::DumpListOfModules( } else { modules->push_back(LoadedModule()); cur_module = &modules->back(); - cur_module->set(cur_name, cur_beg); + cur_module->set(cur_name, cur_beg, cur_arch, cur_uuid); } cur_module->addAddressRange(cur_beg, cur_end, prot & kProtectionExecute); } diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_test.cc b/compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_test.cc index ae7c5d531ae75..4ac55c706d6c4 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_test.cc +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_test.cc @@ -52,5 +52,26 @@ TEST(MemoryMappingLayout, DumpListOfModules) { EXPECT_TRUE(found); } +TEST(MemoryMapping, LoadedModuleArchAndUUID) { + if (SANITIZER_MAC) { + MemoryMappingLayout memory_mapping(false); + const uptr kMaxModules = 100; + InternalMmapVector modules(kMaxModules); + memory_mapping.DumpListOfModules(&modules); + for (uptr i = 0; i < modules.size(); ++i) { + ModuleArch arch = modules[i].arch(); + // Darwin unit tests are only run on i386/x86_64/x86_64h. + if (SANITIZER_WORDSIZE == 32) { + EXPECT_EQ(arch, kModuleArchI386); + } else if (SANITIZER_WORDSIZE == 64) { + EXPECT_TRUE(arch == kModuleArchX86_64 || arch == kModuleArchX86_64H); + } + const u8 *uuid = modules[i].uuid(); + u8 null_uuid[kModuleUUIDSize] = {0}; + EXPECT_NE(memcmp(null_uuid, uuid, kModuleUUIDSize), 0); + } + } +} + } // namespace __sanitizer #endif // !defined(_WIN32)