New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Allow the dumping of .dwo files contents to show up when dumping an e… #66726
Conversation
@llvm/pr-subscribers-debuginfo ChangesAllow the dumping of .dwo files contents to show up when dumping an executable with split DWARF. Currently if you run llvm-dwarfdump on a binary that has skeleton compile units, you only see the skeleton compile units. Since the main binary has the linked addresses it would be nice to be able to dump DWARF from the .dwo files and how the resolved addresses instead of showing the address index and "<unresolved>" in the output. This patch adds an option that can be specified to dump the non skeleton DIEs named --dwo. Added the ability to use the following options with split dwarf as well: Full diff: https://github.com/llvm/llvm-project/pull/66726.diff 6 Files Affected:
diff --git a/llvm/include/llvm/DebugInfo/DIContext.h b/llvm/include/llvm/DebugInfo/DIContext.h
index 9ad27033ec110c2..78ac34e5f0d26c4 100644
--- a/llvm/include/llvm/DebugInfo/DIContext.h
+++ b/llvm/include/llvm/DebugInfo/DIContext.h
@@ -204,6 +204,7 @@ struct DIDumpOptions {
bool Verbose = false;
bool DisplayRawContents = false;
bool IsEH = false;
+ bool DumpNonSkeleton = false;
std::function<llvm::StringRef(uint64_t DwarfRegNum, bool IsEH)>
GetNameForDWARFReg;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
index 6461f2ac031d2a0..30afa651ffc23a7 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
@@ -35,10 +35,16 @@ void DWARFCompileUnit::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {
OS << " (next unit at " << format("0x%08" PRIx64, getNextUnitOffset())
<< ")\n";
- if (DWARFDie CUDie = getUnitDIE(false))
+ if (DWARFDie CUDie = getUnitDIE(false)) {
CUDie.dump(OS, 0, DumpOpts);
- else
+ if (DumpOpts.DumpNonSkeleton) {
+ DWARFDie NonSkeletonCUDie = getNonSkeletonUnitDIE(false);
+ if (NonSkeletonCUDie && CUDie != NonSkeletonCUDie)
+ NonSkeletonCUDie.dump(OS, 0, DumpOpts);
+ }
+ } else {
OS << "<compile unit can't be parsed!>\n\n";
+ }
}
// VTable anchor.
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index a45ed0e56553d4e..c67728ec7bf3448 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1007,9 +1007,17 @@ void DWARFContext::dump(
auto dumpDebugInfo = [&](const char *Name, unit_iterator_range Units) {
OS << '\n' << Name << " contents:\n";
if (auto DumpOffset = DumpOffsets[DIDT_ID_DebugInfo])
- for (const auto &U : Units)
+ for (const auto &U : Units) {
U->getDIEForOffset(*DumpOffset)
.dump(OS, 0, DumpOpts.noImplicitRecursion());
+ DWARFDie CUDie = U->getUnitDIE(false);
+ DWARFDie CUNonSkeletonDie = U->getNonSkeletonUnitDIE(false);
+ if (CUNonSkeletonDie && CUDie != CUNonSkeletonDie) {
+ CUNonSkeletonDie.getDwarfUnit()->getDIEForOffset(*DumpOffset)
+ .dump(OS, 0, DumpOpts.noImplicitRecursion());
+
+ }
+ }
else
for (const auto &U : Units)
U->dump(OS, DumpOpts);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 387345a4ac2d601..f5753b441f7b63f 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -751,7 +751,7 @@ void DWARFUnit::updateAddressDieMap(DWARFDie Die) {
DWARFDie DWARFUnit::getSubroutineForAddress(uint64_t Address) {
extractDIEsIfNeeded(false);
if (AddrDieMap.empty())
- updateAddressDieMap(getUnitDIE());
+ updateAddressDieMap(getNonSkeletonUnitDIE(/*ExtractUnitDIEOnly=*/false));
auto R = AddrDieMap.upper_bound(Address);
if (R == AddrDieMap.begin())
return DWARFDie();
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/statistics-dwo.test b/llvm/test/tools/llvm-dwarfdump/X86/statistics-dwo.test
index 3e39591c46dce19..45fbc5b8945568f 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/statistics-dwo.test
+++ b/llvm/test/tools/llvm-dwarfdump/X86/statistics-dwo.test
@@ -15,39 +15,39 @@ RUN: llvm-dwarfdump --statistics statistics-fib.split-dwarf.o | FileCheck %s
# real_fib (int x, int answers[11])
# {
# int result;
-#
+#
# if ((answers)[x] != -1)
# return (answers)[x];
-#
+#
# result = real_fib(x-1, answers) + real_fib(x-2, answers);
# (answers)[x] = result;
-#
+#
# return result;
# }
-#
+#
# int
# fib (int x)
# {
# int answers[11];
# int i;
-#
+#
# if (x > 10)
# return -1;
-#
+#
# for (i = 0; i < 11; i++)
# answers[i] = -1;
-#
+#
# answers[0] = 0;
# answers[1] = 1;
# answers[2] = 1;
-#
+#
# return real_fib(x, answers);
# }
-#
+#
# int main (int argc, char **argv)
# {
# int result;
-#
+#
# result = fib(3);
# printf ("fibonacci(3) = %d\n", result);
# result = fib(4);
@@ -64,7 +64,7 @@ RUN: llvm-dwarfdump --statistics statistics-fib.split-dwarf.o | FileCheck %s
# printf ("fibonacci(9) = %d\n", result);
# result = fib(10);
# printf ("fibonacci(10) = %d\n", result);
-#
+#
# return 0;
# }
#
diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index 156e10c84dddec9..46724dc571df3e3 100644
--- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -181,6 +181,14 @@ static opt<bool> FindAllApple(
static opt<bool> IgnoreCase("ignore-case",
desc("Ignore case distinctions when using --name."),
value_desc("i"), cat(DwarfDumpCategory));
+static opt<bool> DumpNonSkeleton("dwo",
+ desc("Dump the non skeleton DIE in the .dwo or "
+ ".dwp file after dumping the skeleton DIE "
+ "from the main executable. This allows "
+ "dumping the .dwo files with resolved "
+ "addresses."),
+ value_desc("d"), cat(DwarfDumpCategory));
+
static alias IgnoreCaseAlias("i", desc("Alias for --ignore-case."),
aliasopt(IgnoreCase), cl::NotHidden);
static list<std::string> Name(
@@ -315,6 +323,7 @@ static DIDumpOptions getDumpOpts(DWARFContext &C) {
DumpOpts.ShowForm = ShowForm;
DumpOpts.SummarizeTypes = SummarizeTypes;
DumpOpts.Verbose = Verbose;
+ DumpOpts.DumpNonSkeleton = DumpNonSkeleton;
DumpOpts.RecoverableErrorHandler = C.getRecoverableErrorHandler();
// In -verify mode, print DIEs without children in error messages.
if (Verify) {
@@ -390,7 +399,7 @@ static void filterByName(
const StringSet<> &Names, DWARFContext::unit_iterator_range CUs,
raw_ostream &OS,
std::function<StringRef(uint64_t RegNum, bool IsEH)> GetNameForDWARFReg) {
- for (const auto &CU : CUs)
+ for (const auto &CU : CUs) {
for (const auto &Entry : CU->dies()) {
DWARFDie Die = {CU.get(), &Entry};
if (const char *Name = Die.getName(DINameKind::ShortName))
@@ -399,6 +408,21 @@ static void filterByName(
if (const char *Name = Die.getName(DINameKind::LinkageName))
filterByName(Names, Die, Name, OS, GetNameForDWARFReg);
}
+ // If we have split DWARF, then recurse down into the .dwo files as well.
+ DWARFDie CUDie = CU->getUnitDIE(false);
+ DWARFDie CUNonSkeletonDie = CU->getNonSkeletonUnitDIE(false);
+ if (CUNonSkeletonDie && CUDie != CUNonSkeletonDie) {
+ // We have a DWO file, we need to search it as well
+ for (const auto &Entry : CUNonSkeletonDie.getDwarfUnit()->dies()) {
+ DWARFDie Die = {CUNonSkeletonDie.getDwarfUnit(), &Entry};
+ if (const char *Name = Die.getShortName())
+ if (filterByName(Names, Die, Name, OS, GetNameForDWARFReg))
+ continue;
+ if (const char *Name = Die.getLinkageName())
+ filterByName(Names, Die, Name, OS, GetNameForDWARFReg);
+ }
+ }
+ }
}
static void getDies(DWARFContext &DICtx, const AppleAcceleratorTable &Accel,
|
How do you specify to output DIE at an offset for specific non-skeleton unit? |
Currently you can just use "--debug-info=" and you might see multiple DIEs from multiple .dwo files. I can add a ---dwo-id option to allow only dumping a specific .dwo file. |
right. That is what I was asking. Without it, there will be multiple DIEs being outputted for the same offset. Not sure a flag entry is necessary. Can we just re-use --dwo |
I think this is useful, the only thing I'm confused about is that the test update seems to only modify whitespace? |
Sounds good to me. Could allow a comma separated list that could match against the CU hash or the dwo_name? (so you could do |
I was thinking just adding a
We could also add a
Any of the above could then be combined with the |
And the |
The main question is, should we get this patch in as is and then add the new options? Or just combine it all here. Also from a testing perspective, how can I add a filename + .dwo test that is portable? The DWO name is usually a full path and I am not sure that relative file paths are supported by the llvm stuff? If we are testing this in llvm, then we can't use clang in the tests right? So I would need to either run obj2yaml on the main executable and .dwo file, but then I still end up with problems with the .dwo path being hard coded |
@@ -15,39 +15,39 @@ RUN: llvm-dwarfdump --statistics statistics-fib.split-dwarf.o | FileCheck %s | |||
# real_fib (int x, int answers[11]) | |||
# { | |||
# int result; | |||
# |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bunch of unrelated formatting should be undone here
for (const auto &Entry : CUNonSkeletonDie.getDwarfUnit()->dies()) { | ||
DWARFDie Die = {CUNonSkeletonDie.getDwarfUnit(), &Entry}; | ||
if (const char *Name = Die.getShortName()) | ||
if (filterByName(Names, Die, Name, OS, GetNameForDWARFReg)) | ||
continue; | ||
if (const char *Name = Die.getLinkageName()) | ||
filterByName(Names, Die, Name, OS, GetNameForDWARFReg); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps put this in a lambda and reuse it from both sites, rather than duplicating it from th enon-split case?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yep, I can do that.
I think figuring out the overall design is a good idea then figure out what the implementation strategy is. One argument could be made that a "dump everything" mode might not be what we want (if it is what you want, that's OK - we can do that) I wonder if we didn't do that/didn't add any new flags, and improved the existing search flags
There's lots of existing testing of Split DWARF (for LLVM output, and for llvm-dwarfdump/llvm-symbolizer/etc) - best to take a look at that for inspiration (from a previous/recent discussion, the sort of preference for implementation strategies these days is to use yaml (yaml2obj to create the inputs), assembly (llvm-mc run in the test to assemble it), or LLVM IR in that order of reduced preference/less isolated testing)
Relative paths are supported and used exclusively at Google, for instance - it's the only way to make portable/directory-independent reproducible builds.
Right
If you were generating this from clang, you can use -fdebug-compilation-dir=. to make the paths relative, or you can modify the assembly or yaml after the fact. |
I usually often do want to dump and entire binary since I often make small example binaries to test with, so I like the idea of being able to dump all of the DWARF since the skeleton DIE on its own isn't very useful, and dumping the .dwo file without the main executable is only useful if you are not not looking at anything that needs addresses. I also would like to search be name, by address and by DIE offset, even if that DIE comes from a DWO file. In LLDB the identifier is a uint64_t and many objects in LLDB map back to the DWARF by specifying a uint64_t that contains the DWO index + 32 bit DIE offset. So I often have a function or variable object that has an indentifier that maps back to DWARF using this DWO idx + DIE offset, and I need to find the exact DWARF. So I would vote that we allow a way to dump the main executable and all of its .dwo files so we can see fully linked addresses. "--dwo" takes are of this currently. I also have implemented the main search features I want to use with this patch and hooked it up so it all works with .dwo files. Let me know your thoughts and or suggestions |
if (CUNonSkeletonDie && CUDie != CUNonSkeletonDie) { | ||
CUNonSkeletonDie.getDwarfUnit()->getDIEForOffset(*DumpOffset) | ||
.dump(OS, 0, DumpOpts.noImplicitRecursion()); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
extra blank line could be removed
updateAddressDieMap(getUnitDIE()); | ||
updateAddressDieMap(getNonSkeletonUnitDIE(/*ExtractUnitDIEOnly=*/false)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this seems interesting - what motivates this change/could it be separated from the basic "dump the non-dwos" stuff? (getNonSkeletonUnitDIE
unconditionally could cause a lot more DWARF parsing/trying to read all the DIES in non-skeleton units too when they weren't being read before this patch?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was motivated by the `--lookup ' stuff. If we have a skeleton compile unit, wouldn't we always want to try and grab the .dwo file and do a real lookup instead of just failing? Or worse yet, we have the .dwo file and we are doing a lookup, but the compiler option that leaves inlines in the skeleton compile unit has been enabled and you get subpar results.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd err on the side of not searching all the dwos without opting in, since it might be quite expensive/a lot of results or things to search through. In any case, seems best to discuss in a separate review, perhaps.
I worry about whether this codepath affects symbolizing in some way, for instance (which the inline info in the skeleton is meant to help with/avoid having to parse the dwos). Separate patches will let us track regressions more carefully.
Fair enough - happy enough to add
Yeah, be nice if the existing filters could take, maybe a dwo_name+offset format and an unadorned offset referred to the main file only (since applying it to dwos would be ambiguous, the offset might appear in multiple dwos) and you can use dwo_name+offset, or maybe
Yeah, I'd do those as follow-up patches, with some eye towards a common interface design, possibly the |
✅ With the latest revision this PR passed the C/C++ code formatter. |
Ok, all feedback has been fixed. This patch current only does:
Future patches will add the ability to dump specific items by debug info offset in a specific dwo file as @dwblaikie had suggested. |
DWARFDie CUDie = CU->getUnitDIE(false); | ||
DWARFDie CUNonSkeletonDie = CU->getNonSkeletonUnitDIE(false); | ||
// If we have a DWO file, we need to search it as well | ||
if (CUNonSkeletonDie && CUDie != CUNonSkeletonDie) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be conditional on DumpNonSkeleton
too? (might be easier to review these changes separately - start with just the --dwo
flag for plain dumping, then add filtering separately)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
# RUN: cd %T && llvm-dwarfdump --dwo %T/dump_dwo.o | FileCheck %s | ||
# RUN: cd %T && llvm-dwarfdump --name int --name char %T/dump_dwo.o | FileCheck %s --check-prefix=NAMES | ||
# RUN: cd %T && llvm-dwarfdump --lookup 0x10 %T/dump_dwo.o | FileCheck %s --check-prefix=LOOKUP |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Each run line doesn't need its own cd
- can use one up-front. (maybe even before the yaml2obj
lines, then those lines don't need to %T/dump_dwo.o
they can write to dump_dwo.o
, etc, unqualified.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
updateAddressDieMap(getUnitDIE()); | ||
updateAddressDieMap(getNonSkeletonUnitDIE(/*ExtractUnitDIEOnly=*/false)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd err on the side of not searching all the dwos without opting in, since it might be quite expensive/a lot of results or things to search through. In any case, seems best to discuss in a separate review, perhaps.
I worry about whether this codepath affects symbolizing in some way, for instance (which the inline info in the skeleton is meant to help with/avoid having to parse the dwos). Separate patches will let us track regressions more carefully.
Everything should be fixed. Now if you specify Same thing for lookups. This allows you to search the skeleton compile unit for an address, or the dwo if you specify Added tests for everything. |
This seems over complicates things. |
@dwblaikie Anyting more that needs to happen for this patch? I think I took care of everything that was asked for. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd still rather this was submitted without any of the support in the filter/lookup operations, and do those in separate commits each. But it's probably fine.
// DWO first to see if we have a match. | ||
DWARFDie CUDie = CU->getUnitDIE(false); | ||
DWARFDie CUDwoDie = CU->getNonSkeletonUnitDIE(false); | ||
if (CheckDWO && CUDwoDie && CUDie != CUDwoDie) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need to CheckDWO
again here, as we're inside an if (CheckDWO)
at this point.
…xecutable with split DWARF. Currently if you run llvm-dwarfdump on a binary that has skeleton compile units, you only see the skeleton compile units. Since the main binary has the linked addresses it would be nice to be able to dump DWARF from the .dwo files and how the resolved addresses instead of showing the address index and "<unresolved>" in the output. This patch adds an option that can be specified to dump the non skeleton DIEs named --dwo. Added the ability to use the following options with split dwarf as well: --name <name> --lookup <addr> --debug-info <die-offset>
- Revert changes to `DWARFUnit::getSubroutineForAddress(...)` - Add a "bool CheckDWO" argument to the `DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address)` function to allow the DWO file to be searched first if needed since the DWO file usually has more information. The parameter is defaulted to `false`. - Fix the address lookup calls in llvm-dwarfdump to pass the DumpNonSkeleton bool value to `DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address, bool CheckDWO)` - Add tests that verify --name lookups fail when --dwo is not specified, and succeeds when it is - Add tests that verify --lookup finds info in the skeleton compile unit when --dwo isn't specified and succeeds when it is in finding info from the split compile unit.
Some builds don't enable the x86_64 target, so we sometimes don't see register names in DWARF output.
Some buildbots were failing after this due to x86_64 target not being enabled. Fixed with: commit dc1e279 (HEAD -> main, origin/main, origin/HEAD)
|
…xecutable with split DWARF. (llvm#66726) Allow the dumping of .dwo files contents to show up when dumping an executable with split DWARF. Currently if you run llvm-dwarfdump on a binary that has skeleton compile units, you only see the skeleton compile units. Since the main binary has the linked addresses it would be nice to be able to dump DWARF from the .dwo files and how the resolved addresses instead of showing the address index and "<unresolved>" in the output. This patch adds an option that can be specified to dump the non skeleton DIEs named --dwo. Added the ability to use the following options with split dwarf as well: --name <name> --lookup <addr> --debug-info <die-offset>
Some builds don't enable the x86_64 target, so we sometimes don't see register names in DWARF output.
Allow the dumping of .dwo files contents to show up when dumping an executable with split DWARF.
Currently if you run llvm-dwarfdump on a binary that has skeleton compile units, you only see the skeleton compile units. Since the main binary has the linked addresses it would be nice to be able to dump DWARF from the .dwo files and how the resolved addresses instead of showing the address index and "" in the output. This patch adds an option that can be specified to dump the non skeleton DIEs named --dwo.
Added the ability to use the following options with split dwarf as well:
--name
--lookup
--debug-info