-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[lldb][NFCI] Refactor AppleObjCClassDescriptorV2 method_t parsing functions #163291
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,7 @@ | |
| #include "lldb/Utility/LLDBLog.h" | ||
| #include "lldb/Utility/Log.h" | ||
| #include "lldb/lldb-enumerations.h" | ||
| #include "llvm/ADT/Sequence.h" | ||
|
|
||
| using namespace lldb; | ||
| using namespace lldb_private; | ||
|
|
@@ -266,22 +267,47 @@ bool ClassDescriptorV2::method_list_t::Read(Process *process, | |
| return true; | ||
| } | ||
|
|
||
| bool ClassDescriptorV2::method_t::Read(Process *process, lldb::addr_t addr, | ||
| lldb::addr_t relative_selector_base_addr, | ||
| bool is_small, bool has_direct_sel) { | ||
| size_t ptr_size = process->GetAddressByteSize(); | ||
| size_t size = GetSize(process, is_small); | ||
| llvm::SmallVector<ClassDescriptorV2::method_t, 0> | ||
| ClassDescriptorV2::ReadMethods(llvm::ArrayRef<lldb::addr_t> addresses, | ||
| lldb::addr_t relative_selector_base_addr, | ||
| bool is_small, bool has_direct_sel) const { | ||
| lldb_private::Process *process = m_runtime.GetProcess(); | ||
| if (!process) | ||
| return {}; | ||
|
|
||
| DataBufferHeap buffer(size, '\0'); | ||
| Status error; | ||
| const size_t size = method_t::GetSize(process, is_small); | ||
| const size_t num_methods = addresses.size(); | ||
|
|
||
| process->ReadMemory(addr, buffer.GetBytes(), size, error); | ||
| if (error.Fail()) { | ||
| return false; | ||
| llvm::SmallVector<uint8_t, 0> buffer(num_methods * size, 0); | ||
| llvm::DenseSet<uint32_t> failed_indices; | ||
|
|
||
| for (auto [idx, addr] : llvm::enumerate(addresses)) { | ||
| Status error; | ||
| process->ReadMemory(addr, buffer.data() + idx * size, size, error); | ||
| if (error.Fail()) | ||
| failed_indices.insert(idx); | ||
| } | ||
|
Comment on lines
+284
to
289
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Presumably this is where the multi-read packet will come in?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's why we're doing two passes over the addresses?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, that's correct! |
||
|
|
||
| DataExtractor extractor(buffer.GetBytes(), size, process->GetByteOrder(), | ||
| ptr_size); | ||
| llvm::SmallVector<method_t, 0> methods; | ||
| methods.reserve(num_methods); | ||
| for (auto [idx, addr] : llvm::enumerate(addresses)) { | ||
| if (failed_indices.contains(idx)) | ||
| continue; | ||
| DataExtractor extractor(buffer.data() + idx * size, size, | ||
| process->GetByteOrder(), | ||
| process->GetAddressByteSize()); | ||
| methods.push_back(method_t()); | ||
| methods.back().Read(extractor, process, addr, relative_selector_base_addr, | ||
| is_small, has_direct_sel); | ||
| } | ||
|
|
||
| return methods; | ||
| } | ||
|
|
||
| bool ClassDescriptorV2::method_t::Read(DataExtractor &extractor, | ||
| Process *process, lldb::addr_t addr, | ||
| lldb::addr_t relative_selector_base_addr, | ||
| bool is_small, bool has_direct_sel) { | ||
| lldb::offset_t cursor = 0; | ||
|
|
||
| if (is_small) { | ||
|
|
@@ -291,11 +317,11 @@ bool ClassDescriptorV2::method_t::Read(Process *process, lldb::addr_t addr, | |
|
|
||
| m_name_ptr = addr + nameref_offset; | ||
|
|
||
| Status error; | ||
| if (!has_direct_sel) { | ||
| // The SEL offset points to a SELRef. We need to dereference twice. | ||
| m_name_ptr = process->ReadUnsignedIntegerFromMemory(m_name_ptr, ptr_size, | ||
| 0, error); | ||
| if (!error.Success()) | ||
| m_name_ptr = process->ReadPointerFromMemory(m_name_ptr, error); | ||
| if (error.Fail()) | ||
| return false; | ||
| } else if (relative_selector_base_addr != LLDB_INVALID_ADDRESS) { | ||
| m_name_ptr = relative_selector_base_addr + nameref_offset; | ||
|
|
@@ -308,13 +334,13 @@ bool ClassDescriptorV2::method_t::Read(Process *process, lldb::addr_t addr, | |
| m_imp_ptr = extractor.GetAddress_unchecked(&cursor); | ||
| } | ||
|
|
||
| Status error; | ||
| process->ReadCStringFromMemory(m_name_ptr, m_name, error); | ||
| if (error.Fail()) { | ||
| if (error.Fail()) | ||
| return false; | ||
| } | ||
|
|
||
| process->ReadCStringFromMemory(m_types_ptr, m_types, error); | ||
| return !error.Fail(); | ||
| return error.Success(); | ||
| } | ||
|
|
||
| bool ClassDescriptorV2::ivar_list_t::Read(Process *process, lldb::addr_t addr) { | ||
|
|
@@ -447,17 +473,19 @@ ClassDescriptorV2::GetMethodList(Process *process, | |
| bool ClassDescriptorV2::ProcessMethodList( | ||
| std::function<bool(const char *, const char *)> const &instance_method_func, | ||
| ClassDescriptorV2::method_list_t &method_list) const { | ||
| lldb_private::Process *process = m_runtime.GetProcess(); | ||
| auto method = std::make_unique<method_t>(); | ||
| lldb::addr_t relative_selector_base_addr = | ||
| m_runtime.GetRelativeSelectorBaseAddr(); | ||
| for (uint32_t i = 0, e = method_list.m_count; i < e; ++i) { | ||
| method->Read(process, method_list.m_first_ptr + (i * method_list.m_entsize), | ||
| relative_selector_base_addr, method_list.m_is_small, | ||
| method_list.m_has_direct_selector); | ||
| if (instance_method_func(method->m_name.c_str(), method->m_types.c_str())) | ||
| auto idx_to_method_addr = [&](uint32_t idx) { | ||
| return method_list.m_first_ptr + (idx * method_list.m_entsize); | ||
| }; | ||
| llvm::SmallVector<addr_t> addresses = llvm::to_vector(llvm::map_range( | ||
| llvm::seq<uint32_t>(method_list.m_count), idx_to_method_addr)); | ||
|
|
||
| llvm::SmallVector<method_t, 0> methods = | ||
| ReadMethods(addresses, m_runtime.GetRelativeSelectorBaseAddr(), | ||
| method_list.m_is_small, method_list.m_has_direct_selector); | ||
|
|
||
| for (const auto &method : methods) | ||
| if (instance_method_func(method.m_name.c_str(), method.m_types.c_str())) | ||
| break; | ||
| } | ||
| return true; | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[begin bikeshedding]
Is
SmallVectorworth it when returning by value? If not, should we take an out parameter and have a sane default for the stack allocated number of elements? Either way, I think it would be helpful to make this a typedef so you don't have to think about the number of elements in the template.[end bikeshedding]
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A
SmallVector<T, 0>is supposed to be the exact same size as anstd::vector, but with a more llvm-friendly API.Because the small size is set to zero here, the programmer is expressing something: this is expected to always contain a lot of elements.
Doing this is doing what the compiler already does for us based on most ABIs: https://godbolt.org/z/9Yacveo4o
I'm generally opposed to this kind of typedef, it adds overhead when reading code, as readers always have to go lookup what the typedef this, to then just find out "oh, this is just a vector, why is this a typedef?!".
I understand it can be annoying to have to spell out the SmallSize, but it really is a small price and there is an expressed intent with this (as mentioned above) (also worth noting that a codebase more friendly to
autowould not have that issue 👀 )There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wasn't talking about RVO, I meant taking a
SmallVectorImpland letting the caller decide what amount of elements it expects. But that only makes sense if there's a meaningful value. If we knew that most Objective-C classes have less than say 8 methods, that would make the SmallVector an even better choice. It sounds like here you're just doing it for the llvm interface (which is a good enough reason in and by itself).The argument in favor is that it's abstracting it away on purpose: you decided that the vector "always contain a lot of elements" or at least an unpredictable number of elements (that's how I read that
0) and so you don't have to worry about. It's more about not having to think about the size rather than having to spell it out.This sounds contradictory to your previous statement: unless you have an IDE that show the types of
autoinline, this adds the same amount of (if not more) overhead that you seem to be arguing against.Anyway, I don't feel strongly. I would have made it a typedef. I won't stand in the way if you don't.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While I will keep this one as is because I think the API is communicating something useful ("this is always big"), I will give the type alias a try on a different PR, which I'll tag you on.