Skip to content

Commit

Permalink
[sanitizer] Track architecture and UUID of modules in LoadedModule
Browse files Browse the repository at this point in the history
When we enumerate loaded modules, we only track the module name and base address, which then has several problems on macOS. Dylibs and executables often have several architecture slices and not storing which architecture/UUID is actually loaded creates problems with symbolication: A file path + offset isn't enough to correctly symbolicate, since the offset can be valid in multiple slices. This is especially common for Haswell+ X86_64 machines, where x86_64h slices are preferred, but if one is not available, a regular x86_64 is loaded instead. But the same issue exists for i386 vs. x86_64 as well.

This patch adds tracking of arch and UUID for each LoadedModule. At this point, this information isn't used in reports, but this is the first step. The goal is to correctly identify which slice is loaded in symbolication, and also to output this information in reports so that we can tell which exact slices were loaded in post-mortem analysis.

Differential Revision: https://reviews.llvm.org/D26632

llvm-svn: 288537
  • Loading branch information
kubamracek committed Dec 2, 2016
1 parent 96be8df commit 3eb98a1
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 18 deletions.
9 changes: 9 additions & 0 deletions compiler-rt/lib/sanitizer_common/sanitizer_common.cc
Expand Up @@ -259,9 +259,18 @@ void LoadedModule::set(const char *module_name, uptr base_address) {
base_address_ = base_address;
}

void LoadedModule::set(const char *module_name, uptr base_address,
ModuleArch arch, u8 uuid[kModuleUUIDSize]) {
set(module_name, base_address);
arch_ = arch;
internal_memcpy(uuid_, uuid, sizeof(uuid_));
}

void LoadedModule::clear() {
InternalFree(full_name_);
full_name_ = nullptr;
arch_ = kModuleArchUnknown;
internal_memset(uuid_, 0, kModuleUUIDSize);
while (!ranges_.empty()) {
AddressRange *r = ranges_.front();
ranges_.pop_front();
Expand Down
26 changes: 25 additions & 1 deletion compiler-rt/lib/sanitizer_common/sanitizer_common.h
Expand Up @@ -646,18 +646,40 @@ uptr InternalLowerBound(const Container &v, uptr first, uptr last,
return first;
}

enum ModuleArch {
kModuleArchUnknown,
kModuleArchI386,
kModuleArchX86_64,
kModuleArchX86_64H,
kModuleArchARMV6,
kModuleArchARMV7,
kModuleArchARMV7S,
kModuleArchARMV7K,
kModuleArchARM64
};

const uptr kModuleUUIDSize = 16;

// Represents a binary loaded into virtual memory (e.g. this can be an
// executable or a shared object).
class LoadedModule {
public:
LoadedModule() : full_name_(nullptr), base_address_(0) { ranges_.clear(); }
LoadedModule()
: full_name_(nullptr), base_address_(0), arch_(kModuleArchUnknown) {
internal_memset(uuid_, 0, kModuleUUIDSize);
ranges_.clear();
}
void set(const char *module_name, uptr base_address);
void set(const char *module_name, uptr base_address, ModuleArch arch,
u8 uuid[kModuleUUIDSize]);
void clear();
void addAddressRange(uptr beg, uptr end, bool executable);
bool containsAddress(uptr address) const;

const char *full_name() const { return full_name_; }
uptr base_address() const { return base_address_; }
ModuleArch arch() const { return arch_; }
const u8 *uuid() const { return uuid_; }

struct AddressRange {
AddressRange *next;
Expand All @@ -674,6 +696,8 @@ class LoadedModule {
private:
char *full_name_; // Owned.
uptr base_address_;
ModuleArch arch_;
u8 uuid_[kModuleUUIDSize];
IntrusiveList<AddressRange> ranges_;
};

Expand Down
13 changes: 8 additions & 5 deletions compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h
Expand Up @@ -35,8 +35,9 @@ class MemoryMappingLayout {
public:
explicit MemoryMappingLayout(bool cache_enabled);
~MemoryMappingLayout();
bool Next(uptr *start, uptr *end, uptr *offset,
char filename[], uptr filename_size, uptr *protection);
bool Next(uptr *start, uptr *end, uptr *offset, char filename[],
uptr filename_size, uptr *protection, ModuleArch *arch = nullptr,
u8 *uuid = nullptr);
void Reset();
// In some cases, e.g. when running under a sandbox on Linux, ASan is unable
// to obtain the memory mappings. It should fall back to pre-cached data
Expand Down Expand Up @@ -65,13 +66,15 @@ class MemoryMappingLayout {
static ProcSelfMapsBuff cached_proc_self_maps_;
static StaticSpinMutex cache_lock_; // protects cached_proc_self_maps_.
# elif SANITIZER_MAC
template<u32 kLCSegment, typename SegmentCommand>
bool NextSegmentLoad(uptr *start, uptr *end, uptr *offset,
char filename[], uptr filename_size,
template <u32 kLCSegment, typename SegmentCommand>
bool NextSegmentLoad(uptr *start, uptr *end, uptr *offset, char filename[],
uptr filename_size, ModuleArch *arch, u8 *uuid,
uptr *protection);
int current_image_;
u32 current_magic_;
u32 current_filetype_;
ModuleArch current_arch_;
u8 current_uuid_[kModuleUUIDSize];
int current_load_cmd_count_;
char *current_load_cmd_addr_;
# endif
Expand Down
Expand Up @@ -50,7 +50,9 @@ void ReadProcMaps(ProcSelfMapsBuff *proc_maps) {

bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
char filename[], uptr filename_size,
uptr *protection) {
uptr *protection, ModuleArch *arch, u8 *uuid) {
CHECK(!arch && "not implemented");
CHECK(!uuid && "not implemented");
char *last = proc_self_maps_.data + proc_self_maps_.len;
if (current_ >= last) return false;
uptr dummy;
Expand Down
4 changes: 3 additions & 1 deletion compiler-rt/lib/sanitizer_common/sanitizer_procmaps_linux.cc
Expand Up @@ -28,7 +28,9 @@ static bool IsOneOf(char c, char c1, char c2) {

bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
char filename[], uptr filename_size,
uptr *protection) {
uptr *protection, ModuleArch *arch, u8 *uuid) {
CHECK(!arch && "not implemented");
CHECK(!uuid && "not implemented");
char *last = proc_self_maps_.data + proc_self_maps_.len;
if (current_ >= last) return false;
uptr dummy;
Expand Down
77 changes: 67 additions & 10 deletions compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cc
Expand Up @@ -53,6 +53,8 @@ void MemoryMappingLayout::Reset() {
current_load_cmd_addr_ = 0;
current_magic_ = 0;
current_filetype_ = 0;
current_arch_ = kModuleArchUnknown;
internal_memset(current_uuid_, 0, kModuleUUIDSize);
}

// static
Expand All @@ -71,11 +73,12 @@ void MemoryMappingLayout::LoadFromCache() {
// and returns the start and end addresses and file offset of the corresponding
// segment.
// Note that the segment addresses are not necessarily sorted.
template<u32 kLCSegment, typename SegmentCommand>
bool MemoryMappingLayout::NextSegmentLoad(
uptr *start, uptr *end, uptr *offset,
char filename[], uptr filename_size, uptr *protection) {
const char* lc = current_load_cmd_addr_;
template <u32 kLCSegment, typename SegmentCommand>
bool MemoryMappingLayout::NextSegmentLoad(uptr *start, uptr *end, uptr *offset,
char filename[], uptr filename_size,
ModuleArch *arch, u8 *uuid,
uptr *protection) {
const char *lc = current_load_cmd_addr_;
current_load_cmd_addr_ += ((const load_command *)lc)->cmdsize;
if (((const load_command *)lc)->cmd == kLCSegment) {
const sptr dlloff = _dyld_get_image_vmaddr_slide(current_image_);
Expand All @@ -97,14 +100,61 @@ bool MemoryMappingLayout::NextSegmentLoad(
internal_strncpy(filename, _dyld_get_image_name(current_image_),
filename_size);
}
if (arch) {
*arch = current_arch_;
}
if (uuid) {
internal_memcpy(uuid, current_uuid_, kModuleUUIDSize);
}
return true;
}
return false;
}

ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) {
cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK;
switch (cputype) {
case CPU_TYPE_I386:
return kModuleArchI386;
case CPU_TYPE_X86_64:
if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64;
if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H;
CHECK(0 && "Invalid subtype of x86_64");
return kModuleArchUnknown;
case CPU_TYPE_ARM:
if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6;
if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7;
if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S;
if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K;
CHECK(0 && "Invalid subtype of ARM");
return kModuleArchUnknown;
case CPU_TYPE_ARM64:
return kModuleArchARM64;
default:
CHECK(0 && "Invalid CPU type");
return kModuleArchUnknown;
}
}

static void FindUUID(const load_command *first_lc, u8 *uuid_output) {
const load_command *current_lc = first_lc;
while (1) {
if (current_lc->cmd == 0) return;
if (current_lc->cmd == LC_UUID) {
const uuid_command *uuid_lc = (const uuid_command *)current_lc;
const uint8_t *uuid = &uuid_lc->uuid[0];
internal_memcpy(uuid_output, uuid, kModuleUUIDSize);
return;
}

current_lc =
(const load_command *)(((char *)current_lc) + current_lc->cmdsize);
}
}

bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
char filename[], uptr filename_size,
uptr *protection) {
uptr *protection, ModuleArch *arch, u8 *uuid) {
for (; current_image_ >= 0; current_image_--) {
const mach_header* hdr = _dyld_get_image_header(current_image_);
if (!hdr) continue;
Expand All @@ -113,6 +163,7 @@ bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
current_load_cmd_count_ = hdr->ncmds;
current_magic_ = hdr->magic;
current_filetype_ = hdr->filetype;
current_arch_ = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype);
switch (current_magic_) {
#ifdef MH_MAGIC_64
case MH_MAGIC_64: {
Expand All @@ -130,20 +181,24 @@ bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
}
}

FindUUID((const load_command *)current_load_cmd_addr_, &current_uuid_[0]);

for (; current_load_cmd_count_ >= 0; current_load_cmd_count_--) {
switch (current_magic_) {
// current_magic_ may be only one of MH_MAGIC, MH_MAGIC_64.
#ifdef MH_MAGIC_64
case MH_MAGIC_64: {
if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>(
start, end, offset, filename, filename_size, protection))
start, end, offset, filename, filename_size, arch, uuid,
protection))
return true;
break;
}
#endif
case MH_MAGIC: {
if (NextSegmentLoad<LC_SEGMENT, struct segment_command>(
start, end, offset, filename, filename_size, protection))
start, end, offset, filename, filename_size, arch, uuid,
protection))
return true;
break;
}
Expand All @@ -159,9 +214,11 @@ void MemoryMappingLayout::DumpListOfModules(
InternalMmapVector<LoadedModule> *modules) {
Reset();
uptr cur_beg, cur_end, prot;
ModuleArch cur_arch;
u8 cur_uuid[kModuleUUIDSize];
InternalScopedString module_name(kMaxPathLength);
for (uptr i = 0; Next(&cur_beg, &cur_end, 0, module_name.data(),
module_name.size(), &prot);
module_name.size(), &prot, &cur_arch, &cur_uuid[0]);
i++) {
const char *cur_name = module_name.data();
if (cur_name[0] == '\0')
Expand All @@ -173,7 +230,7 @@ void MemoryMappingLayout::DumpListOfModules(
} else {
modules->push_back(LoadedModule());
cur_module = &modules->back();
cur_module->set(cur_name, cur_beg);
cur_module->set(cur_name, cur_beg, cur_arch, cur_uuid);
}
cur_module->addAddressRange(cur_beg, cur_end, prot & kProtectionExecute);
}
Expand Down
21 changes: 21 additions & 0 deletions compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_test.cc
Expand Up @@ -52,5 +52,26 @@ TEST(MemoryMappingLayout, DumpListOfModules) {
EXPECT_TRUE(found);
}

TEST(MemoryMapping, LoadedModuleArchAndUUID) {
if (SANITIZER_MAC) {
MemoryMappingLayout memory_mapping(false);
const uptr kMaxModules = 100;
InternalMmapVector<LoadedModule> modules(kMaxModules);
memory_mapping.DumpListOfModules(&modules);
for (uptr i = 0; i < modules.size(); ++i) {
ModuleArch arch = modules[i].arch();
// Darwin unit tests are only run on i386/x86_64/x86_64h.
if (SANITIZER_WORDSIZE == 32) {
EXPECT_EQ(arch, kModuleArchI386);
} else if (SANITIZER_WORDSIZE == 64) {
EXPECT_TRUE(arch == kModuleArchX86_64 || arch == kModuleArchX86_64H);
}
const u8 *uuid = modules[i].uuid();
u8 null_uuid[kModuleUUIDSize] = {0};
EXPECT_NE(memcmp(null_uuid, uuid, kModuleUUIDSize), 0);
}
}
}

} // namespace __sanitizer
#endif // !defined(_WIN32)

0 comments on commit 3eb98a1

Please sign in to comment.