From 1eb9a0297cb639323e97ca7f02123b035bf0b24b Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Thu, 4 May 2017 23:53:29 +0000 Subject: [PATCH] [PDB] Don't build the entire source file list up front. I tried to run llvm-pdbdump on a very large (~1.5GB) PDB to try and identify show-stopping performance problems. This patch addresses the first such problem. When loading the DBI stream, before anyone has even tried to access a single record, we build an in memory map of every source file for every module. In the particular PDB I was using, this was over 85 million files. Specifically, the complexity is O(m*n) where m is the number of modules and n is the average number of source files (including headers) per module. The whole reason for doing this was so that we could have constant time access to any module and any of its source file lists. However, we can still get O(1) access to the source file list for a given module with a simple O(m) precomputation, and access to the list of modules is already O(1) anyway. So this patches reduces the O(m*n) up-front precomputation to an O(m) one, where n is ~6,500 and n*m is about 85 million in my pathological test case. Differential Revision: https://reviews.llvm.org/D32870 llvm-svn: 302205 --- .../PDB/Native/DbiModuleDescriptor.h | 8 - .../llvm/DebugInfo/PDB/Native/DbiModuleList.h | 114 ++++++++ .../llvm/DebugInfo/PDB/Native/DbiStream.h | 13 +- .../PDB/Native/NativeCompilandSymbol.h | 4 +- .../DebugInfo/PDB/Native/NativeEnumModules.h | 8 +- .../llvm/DebugInfo/PDB/Native/RawTypes.h | 11 +- llvm/include/llvm/Support/BinaryStreamArray.h | 4 + llvm/lib/DebugInfo/PDB/CMakeLists.txt | 1 + .../DebugInfo/PDB/Native/DbiModuleList.cpp | 273 ++++++++++++++++++ llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp | 112 +------ .../PDB/Native/NativeCompilandSymbol.cpp | 8 +- .../PDB/Native/NativeEnumModules.cpp | 15 +- .../DebugInfo/PDB/Native/NativeExeSymbol.cpp | 2 +- llvm/tools/llvm-pdbdump/LLVMOutputStyle.cpp | 49 ++-- llvm/tools/llvm-pdbdump/StreamUtil.cpp | 13 +- llvm/tools/llvm-pdbdump/YAMLOutputStyle.cpp | 19 +- 16 files changed, 473 insertions(+), 181 deletions(-) create mode 100644 llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h create mode 100644 llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h index d1f791b9daede..7e77f5a3eef92 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h @@ -53,14 +53,6 @@ class DbiModuleDescriptor { const ModuleInfoHeader *Layout = nullptr; }; -struct ModuleInfoEx { - ModuleInfoEx(const DbiModuleDescriptor &Info) : Info(Info) {} - ModuleInfoEx(const ModuleInfoEx &Ex) = default; - - DbiModuleDescriptor Info; - std::vector SourceFiles; -}; - } // end namespace pdb template <> struct VarStreamArrayExtractor { diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h new file mode 100644 index 0000000000000..bcf1cff8f6e5d --- /dev/null +++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h @@ -0,0 +1,114 @@ +//===- DbiModuleList.h - PDB module information list ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H +#define LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace llvm { +namespace pdb { + +class DbiModuleList; +struct FileInfoSubstreamHeader; + +class DbiModuleSourceFilesIterator + : public iterator_facade_base { + typedef iterator_facade_base + BaseType; + +public: + DbiModuleSourceFilesIterator(const DbiModuleList &Modules, uint32_t Modi, + uint16_t Filei); + DbiModuleSourceFilesIterator() = default; + DbiModuleSourceFilesIterator & + operator=(const DbiModuleSourceFilesIterator &R) = default; + + bool operator==(const DbiModuleSourceFilesIterator &R) const; + + const StringRef &operator*() const { return ThisValue; } + StringRef &operator*() { return ThisValue; } + + bool operator<(const DbiModuleSourceFilesIterator &RHS) const; + std::ptrdiff_t operator-(const DbiModuleSourceFilesIterator &R) const; + DbiModuleSourceFilesIterator &operator+=(std::ptrdiff_t N); + DbiModuleSourceFilesIterator &operator-=(std::ptrdiff_t N); + +private: + void setValue(); + + bool isEnd() const; + bool isCompatible(const DbiModuleSourceFilesIterator &R) const; + bool isUniversalEnd() const; + + StringRef ThisValue; + const DbiModuleList *Modules{nullptr}; + uint32_t Modi{0}; + uint16_t Filei{0}; +}; + +class DbiModuleList { + friend DbiModuleSourceFilesIterator; + +public: + Error initialize(BinaryStreamRef ModInfo, BinaryStreamRef FileInfo); + + Expected getFileName(uint32_t Index) const; + uint32_t getModuleCount() const; + uint32_t getSourceFileCount() const; + uint16_t getSourceFileCount(uint32_t Modi) const; + + iterator_range + source_files(uint32_t Modi) const; + + DbiModuleDescriptor getModuleDescriptor(uint32_t Modi) const; + +private: + Error initializeModInfo(BinaryStreamRef ModInfo); + Error initializeFileInfo(BinaryStreamRef FileInfo); + + VarStreamArray Descriptors; + + FixedStreamArray FileNameOffsets; + FixedStreamArray ModFileCountArray; + + // For each module, there are multiple filenames, which can be obtained by + // knowing the index of the file. Given the index of the file, one can use + // that as an offset into the FileNameOffsets array, which contains the + // absolute offset of the file name in NamesBuffer. Thus, for each module + // we store the first index in the FileNameOffsets array for this module. + // The number of files for the corresponding module is stored in + // ModFileCountArray. + std::vector ModuleInitialFileIndex; + + // In order to provide random access into the Descriptors array, we iterate it + // once up front to find the offsets of the individual items and store them in + // this array. + std::vector ModuleDescriptorOffsets; + + const FileInfoSubstreamHeader *FileInfoHeader = nullptr; + + BinaryStreamRef ModInfoSubstream; + BinaryStreamRef FileInfoSubstream; + BinaryStreamRef NamesBuffer; +}; +} +} + +#endif // LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H \ No newline at end of file diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h index 08262e47f77f5..8f95481f41521 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h @@ -13,6 +13,7 @@ #include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h" #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" @@ -68,9 +69,7 @@ class DbiStream { /// not present, returns InvalidStreamIndex. uint32_t getDebugStreamIndex(DbgHeaderType Type) const; - ArrayRef modules() const; - - Expected getFileNameForIndex(uint32_t Index) const; + const DbiModuleList &modules() const; FixedStreamArray getSectionHeaders(); @@ -80,27 +79,22 @@ class DbiStream { void visitSectionContributions(ISectionContribVisitor &Visitor) const; private: - Error initializeModInfoArray(); Error initializeSectionContributionData(); Error initializeSectionHeadersData(); Error initializeSectionMapData(); - Error initializeFileInfo(); Error initializeFpoRecords(); PDBFile &Pdb; std::unique_ptr Stream; - std::vector ModuleInfos; PDBStringTable ECNames; - BinaryStreamRef ModInfoSubstream; BinaryStreamRef SecContrSubstream; BinaryStreamRef SecMapSubstream; - BinaryStreamRef FileInfoSubstream; BinaryStreamRef TypeServerMapSubstream; BinaryStreamRef ECSubstream; - BinaryStreamRef NamesBuffer; + DbiModuleList Modules; FixedStreamArray DbgStreams; @@ -108,7 +102,6 @@ class DbiStream { FixedStreamArray SectionContribs; FixedStreamArray SectionContribs2; FixedStreamArray SectionMap; - FixedStreamArray FileNameOffsets; std::unique_ptr SectionHeaderStream; FixedStreamArray SectionHeaders; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h index b1d980679a455..22ed61910d94a 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h @@ -18,7 +18,7 @@ namespace pdb { class NativeCompilandSymbol : public NativeRawSymbol { public: - NativeCompilandSymbol(NativeSession &Session, const ModuleInfoEx &MI); + NativeCompilandSymbol(NativeSession &Session, DbiModuleDescriptor MI); PDB_SymType getSymTag() const override; bool isEditAndContinueEnabled() const override; uint32_t getLexicalParentId() const override; @@ -26,7 +26,7 @@ class NativeCompilandSymbol : public NativeRawSymbol { std::string getName() const override; private: - ModuleInfoEx Module; + DbiModuleDescriptor Module; }; } // namespace pdb diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h index 18022f599bbaf..6aa1460dbb4e6 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h @@ -16,13 +16,13 @@ namespace llvm { namespace pdb { +class DbiModuleList; class NativeSession; class NativeEnumModules : public IPDBEnumChildren { public: - explicit NativeEnumModules(NativeSession &Session, - ArrayRef Modules, - uint32_t Index = 0); + NativeEnumModules(NativeSession &Session, const DbiModuleList &Modules, + uint32_t Index = 0); uint32_t getChildCount() const override; std::unique_ptr getChildAtIndex(uint32_t Index) const override; @@ -32,7 +32,7 @@ class NativeEnumModules : public IPDBEnumChildren { private: NativeSession &Session; - ArrayRef Modules; + const DbiModuleList &Modules; uint32_t Index; }; } diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h b/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h index 93622d0a43949..979b8454dd5ec 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h @@ -211,7 +211,7 @@ struct ModInfoFlags { }; /// The header preceeding each entry in the Module Info substream of the DBI -/// stream. +/// stream. Corresponds to the type MODI in the reference implementation. struct ModuleInfoHeader { /// Currently opened module. This field is a pointer in the reference /// implementation, but that won't work on 64-bit systems, and anyway it @@ -243,9 +243,12 @@ struct ModuleInfoHeader { /// Padding so the next field is 4-byte aligned. char Padding1[2]; - /// Array of [0..NumFiles) DBI name buffer offsets. This field is a pointer - /// in the reference implementation, but as with `Mod`, we ignore it for now - /// since it is unused. + /// Array of [0..NumFiles) DBI name buffer offsets. In the reference + /// implementation this field is a pointer. But since you can't portably + /// serialize a pointer, on 64-bit platforms they copy all the values except + /// this one into the 32-bit version of the struct and use that for + /// serialization. Regardless, this field is unused, it is only there to + /// store a pointer that can be accessed at runtime. support::ulittle32_t FileNameOffs; /// Name Index for src file name diff --git a/llvm/include/llvm/Support/BinaryStreamArray.h b/llvm/include/llvm/Support/BinaryStreamArray.h index f141c30f16c7b..c36e5da72d542 100644 --- a/llvm/include/llvm/Support/BinaryStreamArray.h +++ b/llvm/include/llvm/Support/BinaryStreamArray.h @@ -115,6 +115,7 @@ class VarStreamArrayIterator for (unsigned I = 0; I < N; ++I) { // We are done with the current record, discard it so that we are // positioned at the next record. + AbsOffset += ThisLen; IterRef = IterRef.drop_front(ThisLen); if (IterRef.getLength() == 0) { // There is nothing after the current record, we must make this an end @@ -135,6 +136,8 @@ class VarStreamArrayIterator return *this; } + uint32_t offset() const { return AbsOffset; } + private: void moveToEnd() { Array = nullptr; @@ -152,6 +155,7 @@ class VarStreamArrayIterator const WrappedCtx *Ctx{nullptr}; const ArrayType *Array{nullptr}; uint32_t ThisLen{0}; + uint32_t AbsOffset{0}; bool HasError{false}; bool *HadError{nullptr}; }; diff --git a/llvm/lib/DebugInfo/PDB/CMakeLists.txt b/llvm/lib/DebugInfo/PDB/CMakeLists.txt index e1753018c7df3..e9fd29ccc4caf 100644 --- a/llvm/lib/DebugInfo/PDB/CMakeLists.txt +++ b/llvm/lib/DebugInfo/PDB/CMakeLists.txt @@ -30,6 +30,7 @@ endif() add_pdb_impl_folder(Native Native/DbiModuleDescriptor.cpp Native/DbiModuleDescriptorBuilder.cpp + Native/DbiModuleList.cpp Native/DbiStream.cpp Native/DbiStreamBuilder.cpp Native/EnumTables.cpp diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp new file mode 100644 index 0000000000000..434f775097e04 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp @@ -0,0 +1,273 @@ +//===- DbiModuleList.cpp - PDB module information list ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h" + +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/Support/Error.h" + +using namespace llvm; +using namespace llvm::pdb; + +DbiModuleSourceFilesIterator::DbiModuleSourceFilesIterator( + const DbiModuleList &Modules, uint32_t Modi, uint16_t Filei) + : Modules(&Modules), Modi(Modi), Filei(Filei) { + setValue(); +} + +bool DbiModuleSourceFilesIterator:: +operator==(const DbiModuleSourceFilesIterator &R) const { + // incompatible iterators are never equal + if (!isCompatible(R)) + return false; + + // If they're compatible, and they're both ends, then they're equal. + if (isEnd() && R.isEnd()) + return true; + + // If one is an end and the other is not, they're not equal. + if (isEnd() != R.isEnd()) + return false; + + // Now we know: + // - They're compatible + // - They're not *both* end iterators + // - Their endness is the same. + // Thus, they're compatible iterators pointing to a valid file on the same + // module. All we need to check are the file indices. + assert(Modules == R.Modules); + assert(Modi == R.Modi); + assert(!isEnd()); + assert(!R.isEnd()); + + return (Filei == R.Filei); +} + +bool DbiModuleSourceFilesIterator:: +operator<(const DbiModuleSourceFilesIterator &R) const { + assert(isCompatible(R)); + + // It's not sufficient to compare the file indices, because default + // constructed iterators could be equal to iterators with valid indices. To + // account for this, early-out if they're equal. + if (*this == R) + return false; + + return Filei < R.Filei; +} + +std::ptrdiff_t DbiModuleSourceFilesIterator:: +operator-(const DbiModuleSourceFilesIterator &R) const { + assert(isCompatible(R)); + assert(!(*this < R)); + + // If they're both end iterators, the distance is 0. + if (isEnd() && R.isEnd()) + return 0; + + assert(!R.isEnd()); + + // At this point, R cannot be end, but *this can, which means that *this + // might be a universal end iterator with none of its fields set. So in that + // case have to rely on R as the authority to figure out how many files there + // are to compute the distance. + uint32_t Thisi = Filei; + if (isEnd()) { + uint32_t RealModi = R.Modi; + Thisi = R.Modules->getSourceFileCount(RealModi); + } + + assert(Thisi >= R.Filei); + return Thisi - R.Filei; +} + +DbiModuleSourceFilesIterator &DbiModuleSourceFilesIterator:: +operator+=(std::ptrdiff_t N) { + assert(!isEnd()); + + Filei += N; + assert(Filei <= Modules->getSourceFileCount(Modi)); + setValue(); + return *this; +} + +DbiModuleSourceFilesIterator &DbiModuleSourceFilesIterator:: +operator-=(std::ptrdiff_t N) { + // Note that we can subtract from an end iterator, but not a universal end + // iterator. + assert(!isUniversalEnd()); + + assert(N <= Filei); + + Filei -= N; + return *this; +} + +void DbiModuleSourceFilesIterator::setValue() { + if (isEnd()) { + ThisValue = ""; + return; + } + + uint32_t Off = Modules->ModuleInitialFileIndex[Modi] + Filei; + auto ExpectedValue = Modules->getFileName(Off); + if (!ExpectedValue) { + consumeError(ExpectedValue.takeError()); + Filei = Modules->getSourceFileCount(Modi); + } else + ThisValue = *ExpectedValue; +} + +bool DbiModuleSourceFilesIterator::isEnd() const { + if (isUniversalEnd()) + return true; + + assert(Modules); + assert(Modi <= Modules->getModuleCount()); + assert(Filei <= Modules->getSourceFileCount(Modi)); + + if (Modi == Modules->getModuleCount()) + return true; + if (Filei == Modules->getSourceFileCount(Modi)) + return true; + return false; +} + +bool DbiModuleSourceFilesIterator::isUniversalEnd() const { return !Modules; } + +bool DbiModuleSourceFilesIterator::isCompatible( + const DbiModuleSourceFilesIterator &R) const { + // Universal iterators are compatible with any other iterator. + if (isUniversalEnd() || R.isUniversalEnd()) + return true; + + // At this point, neither iterator is a universal end iterator, although one + // or both might be non-universal end iterators. Regardless, the module index + // is valid, so they are compatible if and only if they refer to the same + // module. + return Modi == R.Modi; +} + +Error DbiModuleList::initialize(BinaryStreamRef ModInfo, + BinaryStreamRef FileInfo) { + if (auto EC = initializeModInfo(ModInfo)) + return EC; + if (auto EC = initializeFileInfo(FileInfo)) + return EC; + + return Error::success(); +} + +Error DbiModuleList::initializeModInfo(BinaryStreamRef ModInfo) { + ModInfoSubstream = ModInfo; + + if (ModInfo.getLength() == 0) + return Error::success(); + + BinaryStreamReader Reader(ModInfo); + + if (auto EC = Reader.readArray(Descriptors, ModInfo.getLength())) + return EC; + + return Error::success(); +} + +Error DbiModuleList::initializeFileInfo(BinaryStreamRef FileInfo) { + FileInfoSubstream = FileInfo; + + if (FileInfo.getLength() == 0) + return Error::success(); + + BinaryStreamReader FISR(FileInfo); + if (auto EC = FISR.readObject(FileInfoHeader)) + return EC; + + // First is an array of `NumModules` module indices. This does not seem to be + // used for anything meaningful, so we ignore it. + FixedStreamArray ModuleIndices; + if (auto EC = FISR.readArray(ModuleIndices, FileInfoHeader->NumModules)) + return EC; + if (auto EC = FISR.readArray(ModFileCountArray, FileInfoHeader->NumModules)) + return EC; + + // Compute the real number of source files. We can't trust the value in + // `FileInfoHeader->NumSourceFiles` because it is a unit16, and the sum of all + // source file counts might be larger than a unit16. So we compute the real + // count by summing up the individual counts. + uint32_t NumSourceFiles = 0; + for (auto Count : ModFileCountArray) + NumSourceFiles += Count; + + // In the reference implementation, this array is where the pointer documented + // at the definition of ModuleInfoHeader::FileNameOffs points to. Note that + // although the field in ModuleInfoHeader is ignored this array is not, as it + // is the authority on where each filename begins in the names buffer. + if (auto EC = FISR.readArray(FileNameOffsets, NumSourceFiles)) + return EC; + + if (auto EC = FISR.readStreamRef(NamesBuffer)) + return EC; + + auto DescriptorIter = Descriptors.begin(); + uint32_t NextFileIndex = 0; + ModuleInitialFileIndex.resize(FileInfoHeader->NumModules); + ModuleDescriptorOffsets.resize(FileInfoHeader->NumModules); + for (size_t I = 0; I < FileInfoHeader->NumModules; ++I) { + assert(DescriptorIter != Descriptors.end()); + ModuleInitialFileIndex[I] = NextFileIndex; + ModuleDescriptorOffsets[I] = DescriptorIter.offset(); + + NextFileIndex += ModFileCountArray[I]; + ++DescriptorIter; + } + + assert(DescriptorIter == Descriptors.end()); + assert(NextFileIndex == NumSourceFiles); + + return Error::success(); +} + +uint32_t DbiModuleList::getModuleCount() const { + return FileInfoHeader->NumModules; +} + +uint32_t DbiModuleList::getSourceFileCount() const { + return FileNameOffsets.size(); +} + +uint16_t DbiModuleList::getSourceFileCount(uint32_t Modi) const { + return ModFileCountArray[Modi]; +} + +DbiModuleDescriptor DbiModuleList::getModuleDescriptor(uint32_t Modi) const { + assert(Modi < getModuleCount()); + uint32_t Offset = ModuleDescriptorOffsets[Modi]; + auto Iter = Descriptors.at(Offset); + assert(Iter != Descriptors.end()); + return *Iter; +} + +iterator_range +DbiModuleList::source_files(uint32_t Modi) const { + return make_range( + DbiModuleSourceFilesIterator(*this, Modi, 0), + DbiModuleSourceFilesIterator()); +} + +Expected DbiModuleList::getFileName(uint32_t Index) const { + BinaryStreamReader Names(NamesBuffer); + if (Index >= getSourceFileCount()) + return make_error(raw_error_code::index_out_of_bounds); + + uint32_t FileOffset = FileNameOffsets[Index]; + Names.setOffset(FileOffset); + StringRef Name; + if (auto EC = Names.readCString(Name)) + return std::move(EC); + return Name; +} diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp index db703809f7c9c..f7538c580ba45 100644 --- a/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp @@ -107,11 +107,11 @@ Error DbiStream::reload() { return make_error(raw_error_code::corrupt_file, "DBI type server substream not aligned."); + BinaryStreamRef ModInfoSubstream; + BinaryStreamRef FileInfoSubstream; if (auto EC = Reader.readStreamRef(ModInfoSubstream, Header->ModiSubstreamSize)) return EC; - if (auto EC = initializeModInfoArray()) - return EC; if (auto EC = Reader.readStreamRef(SecContrSubstream, Header->SecContrSubstreamSize)) @@ -129,14 +129,15 @@ Error DbiStream::reload() { DbgStreams, Header->OptionalDbgHdrSize / sizeof(ulittle16_t))) return EC; + if (auto EC = Modules.initialize(ModInfoSubstream, FileInfoSubstream)) + return EC; + if (auto EC = initializeSectionContributionData()) return EC; if (auto EC = initializeSectionHeadersData()) return EC; if (auto EC = initializeSectionMapData()) return EC; - if (auto EC = initializeFileInfo()) - return EC; if (auto EC = initializeFpoRecords()) return EC; @@ -215,7 +216,8 @@ FixedStreamArray DbiStream::getFpoRecords() { return FpoRecords; } -ArrayRef DbiStream::modules() const { return ModuleInfos; } +const DbiModuleList &DbiStream::modules() const { return Modules; } + FixedStreamArray DbiStream::getSectionMap() const { return SectionMap; } @@ -248,25 +250,6 @@ Error DbiStream::initializeSectionContributionData() { "Unsupported DBI Section Contribution version"); } -Error DbiStream::initializeModInfoArray() { - if (ModInfoSubstream.getLength() == 0) - return Error::success(); - - // Since each DbiModuleDescriptor in the stream is a variable length, we have - // to iterate - // them to know how many there actually are. - BinaryStreamReader Reader(ModInfoSubstream); - - VarStreamArray ModInfoArray; - if (auto EC = Reader.readArray(ModInfoArray, ModInfoSubstream.getLength())) - return EC; - for (auto &Info : ModInfoArray) { - ModuleInfos.emplace_back(Info); - } - - return Error::success(); -} - // Initializes this->SectionHeaders. Error DbiStream::initializeSectionHeadersData() { if (DbgStreams.size() == 0) @@ -338,90 +321,9 @@ Error DbiStream::initializeSectionMapData() { return Error::success(); } -Error DbiStream::initializeFileInfo() { - if (FileInfoSubstream.getLength() == 0) - return Error::success(); - - const FileInfoSubstreamHeader *FH; - BinaryStreamReader FISR(FileInfoSubstream); - if (auto EC = FISR.readObject(FH)) - return EC; - - // The number of modules in the stream should be the same as reported by - // the FileInfoSubstreamHeader. - if (FH->NumModules != ModuleInfos.size()) - return make_error(raw_error_code::corrupt_file, - "FileInfo substream count doesn't match DBI."); - - FixedStreamArray ModIndexArray; - FixedStreamArray ModFileCountArray; - - // First is an array of `NumModules` module indices. This is not used for the - // same reason that `NumSourceFiles` is not used. It's an array of uint16's, - // but it's possible there are more than 64k source files, which would imply - // more than 64k modules (e.g. object files) as well. So we ignore this - // field. - if (auto EC = FISR.readArray(ModIndexArray, ModuleInfos.size())) - return EC; - if (auto EC = FISR.readArray(ModFileCountArray, ModuleInfos.size())) - return EC; - - // Compute the real number of source files. - uint32_t NumSourceFiles = 0; - for (auto Count : ModFileCountArray) - NumSourceFiles += Count; - - // This is the array that in the reference implementation corresponds to - // `DbiModuleDescriptor::FileLayout::FileNameOffs`, which is commented there - // as being a - // pointer. Due to the mentioned problems of pointers causing difficulty - // when reading from the file on 64-bit systems, we continue to ignore that - // field in `DbiModuleDescriptor`, and instead build a vector of StringRefs - // and stores - // them in `ModuleInfoEx`. The value written to and read from the file is - // not used anyway, it is only there as a way to store the offsets for the - // purposes of later accessing the names at runtime. - if (auto EC = FISR.readArray(FileNameOffsets, NumSourceFiles)) - return EC; - - if (auto EC = FISR.readStreamRef(NamesBuffer)) - return EC; - - // We go through each ModuleInfo, determine the number N of source files for - // that module, and then get the next N offsets from the Offsets array, using - // them to get the corresponding N names from the Names buffer and associating - // each one with the corresponding module. - uint32_t NextFileIndex = 0; - for (size_t I = 0; I < ModuleInfos.size(); ++I) { - uint32_t NumFiles = ModFileCountArray[I]; - ModuleInfos[I].SourceFiles.resize(NumFiles); - for (size_t J = 0; J < NumFiles; ++J, ++NextFileIndex) { - auto ThisName = getFileNameForIndex(NextFileIndex); - if (!ThisName) - return ThisName.takeError(); - ModuleInfos[I].SourceFiles[J] = *ThisName; - } - } - - return Error::success(); -} - uint32_t DbiStream::getDebugStreamIndex(DbgHeaderType Type) const { uint16_t T = static_cast(Type); if (T >= DbgStreams.size()) return kInvalidStreamIndex; return DbgStreams[T]; } - -Expected DbiStream::getFileNameForIndex(uint32_t Index) const { - BinaryStreamReader Names(NamesBuffer); - if (Index >= FileNameOffsets.size()) - return make_error(raw_error_code::index_out_of_bounds); - - uint32_t FileOffset = FileNameOffsets[Index]; - Names.setOffset(FileOffset); - StringRef Name; - if (auto EC = Names.readCString(Name)) - return std::move(EC); - return Name; -} diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp index 9c0cc0bf82337..77f832582f824 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp @@ -13,7 +13,7 @@ namespace llvm { namespace pdb { NativeCompilandSymbol::NativeCompilandSymbol(NativeSession &Session, - const ModuleInfoEx &MI) + DbiModuleDescriptor MI) : NativeRawSymbol(Session), Module(MI) {} PDB_SymType NativeCompilandSymbol::getSymTag() const { @@ -21,7 +21,7 @@ PDB_SymType NativeCompilandSymbol::getSymTag() const { } bool NativeCompilandSymbol::isEditAndContinueEnabled() const { - return Module.Info.hasECInfo(); + return Module.hasECInfo(); } uint32_t NativeCompilandSymbol::getLexicalParentId() const { return 0; } @@ -32,11 +32,11 @@ uint32_t NativeCompilandSymbol::getLexicalParentId() const { return 0; } // this potential confusion. std::string NativeCompilandSymbol::getLibraryName() const { - return Module.Info.getObjFileName(); + return Module.getObjFileName(); } std::string NativeCompilandSymbol::getName() const { - return Module.Info.getModuleName(); + return Module.getModuleName(); } } // namespace pdb diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp index 7532110d005c9..97319fd77d117 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp @@ -10,6 +10,7 @@ #include "llvm/DebugInfo/PDB/Native/NativeEnumModules.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h" #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" @@ -19,25 +20,25 @@ namespace llvm { namespace pdb { NativeEnumModules::NativeEnumModules(NativeSession &PDBSession, - ArrayRef Modules, + const DbiModuleList &Modules, uint32_t Index) : Session(PDBSession), Modules(Modules), Index(Index) {} uint32_t NativeEnumModules::getChildCount() const { - return static_cast(Modules.size()); + return static_cast(Modules.getModuleCount()); } std::unique_ptr NativeEnumModules::getChildAtIndex(uint32_t Index) const { - if (Index >= Modules.size()) + if (Index >= Modules.getModuleCount()) return nullptr; - return std::unique_ptr(new PDBSymbolCompiland(Session, - std::unique_ptr( - new NativeCompilandSymbol(Session, Modules[Index])))); + return std::unique_ptr(new PDBSymbolCompiland( + Session, std::unique_ptr(new NativeCompilandSymbol( + Session, Modules.getModuleDescriptor(Index))))); } std::unique_ptr NativeEnumModules::getNext() { - if (Index >= Modules.size()) + if (Index >= Modules.getModuleCount()) return nullptr; return getChildAtIndex(Index++); } diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp index ec2a4b87457c1..bb52560be167a 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp @@ -26,7 +26,7 @@ NativeExeSymbol::findChildren(PDB_SymType Type) const { case PDB_SymType::Compiland: { auto Dbi = File.getPDBDbiStream(); if (Dbi) { - const auto Modules = Dbi->modules(); + const DbiModuleList &Modules = Dbi->modules(); return std::unique_ptr( new NativeEnumModules(Session, Modules)); } diff --git a/llvm/tools/llvm-pdbdump/LLVMOutputStyle.cpp b/llvm/tools/llvm-pdbdump/LLVMOutputStyle.cpp index 06d8ced128a07..1af62b4e8df68 100644 --- a/llvm/tools/llvm-pdbdump/LLVMOutputStyle.cpp +++ b/llvm/tools/llvm-pdbdump/LLVMOutputStyle.cpp @@ -775,37 +775,37 @@ Error LLVMOutputStyle::dumpDbiStream() { if (DumpModules) { ListScope L(P, "Modules"); - for (auto &Modi : DS->modules()) { + const DbiModuleList &Modules = DS->modules(); + for (uint32_t I = 0; I < Modules.getModuleCount(); ++I) { + const DbiModuleDescriptor &Modi = Modules.getModuleDescriptor(I); DictScope DD(P); - P.printString("Name", Modi.Info.getModuleName().str()); - P.printNumber("Debug Stream Index", Modi.Info.getModuleStreamIndex()); - P.printString("Object File Name", Modi.Info.getObjFileName().str()); - P.printNumber("Num Files", Modi.Info.getNumberOfFiles()); - P.printNumber("Source File Name Idx", Modi.Info.getSourceFileNameIndex()); - P.printNumber("Pdb File Name Idx", Modi.Info.getPdbFilePathNameIndex()); - P.printNumber("Line Info Byte Size", Modi.Info.getC11LineInfoByteSize()); - P.printNumber("C13 Line Info Byte Size", - Modi.Info.getC13LineInfoByteSize()); - P.printNumber("Symbol Byte Size", Modi.Info.getSymbolDebugInfoByteSize()); - P.printNumber("Type Server Index", Modi.Info.getTypeServerIndex()); - P.printBoolean("Has EC Info", Modi.Info.hasECInfo()); + P.printString("Name", Modi.getModuleName().str()); + P.printNumber("Debug Stream Index", Modi.getModuleStreamIndex()); + P.printString("Object File Name", Modi.getObjFileName().str()); + P.printNumber("Num Files", Modi.getNumberOfFiles()); + P.printNumber("Source File Name Idx", Modi.getSourceFileNameIndex()); + P.printNumber("Pdb File Name Idx", Modi.getPdbFilePathNameIndex()); + P.printNumber("Line Info Byte Size", Modi.getC11LineInfoByteSize()); + P.printNumber("C13 Line Info Byte Size", Modi.getC13LineInfoByteSize()); + P.printNumber("Symbol Byte Size", Modi.getSymbolDebugInfoByteSize()); + P.printNumber("Type Server Index", Modi.getTypeServerIndex()); + P.printBoolean("Has EC Info", Modi.hasECInfo()); if (opts::raw::DumpModuleFiles) { - std::string FileListName = - to_string(Modi.SourceFiles.size()) + " Contributing Source Files"; + std::string FileListName = to_string(Modules.getSourceFileCount(I)) + + " Contributing Source Files"; ListScope LL(P, FileListName); - for (auto File : Modi.SourceFiles) - P.printString(File.str()); + for (auto File : Modules.source_files(I)) + P.printString(File); } - bool HasModuleDI = - (Modi.Info.getModuleStreamIndex() < File.getNumStreams()); + bool HasModuleDI = (Modi.getModuleStreamIndex() < File.getNumStreams()); bool ShouldDumpSymbols = (opts::raw::DumpModuleSyms || opts::raw::DumpSymRecordBytes); if (HasModuleDI && (ShouldDumpSymbols || opts::raw::DumpLineInfo)) { auto ModStreamData = MappedBlockStream::createIndexedStream( File.getMsfLayout(), File.getMsfBuffer(), - Modi.Info.getModuleStreamIndex()); + Modi.getModuleStreamIndex()); - ModuleDebugStreamRef ModS(Modi.Info, std::move(ModStreamData)); + ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); if (auto EC = ModS.reload()) return EC; @@ -876,9 +876,10 @@ Error LLVMOutputStyle::dumpSectionContribs() { { DictScope DD(P, "Module"); P.printNumber("Index", SC.Imod); - auto M = DS.modules(); - if (M.size() > SC.Imod) { - P.printString("Name", M[SC.Imod].Info.getModuleName()); + const DbiModuleList &Modules = DS.modules(); + if (Modules.getModuleCount() > SC.Imod) { + P.printString("Name", + Modules.getModuleDescriptor(SC.Imod).getModuleName()); } } P.printNumber("Data CRC", SC.DataCrc); diff --git a/llvm/tools/llvm-pdbdump/StreamUtil.cpp b/llvm/tools/llvm-pdbdump/StreamUtil.cpp index 6577702adac88..81aa256b5002d 100644 --- a/llvm/tools/llvm-pdbdump/StreamUtil.cpp +++ b/llvm/tools/llvm-pdbdump/StreamUtil.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" @@ -30,14 +31,16 @@ void discoverStreamPurposes(PDBFile &File, auto Info = File.getPDBInfoStream(); uint32_t StreamCount = File.getNumStreams(); - DenseMap ModStreams; + DenseMap ModStreams; DenseMap NamedStreams; if (Dbi) { - for (auto &ModI : Dbi->modules()) { - uint16_t SN = ModI.Info.getModuleStreamIndex(); + const DbiModuleList &Modules = Dbi->modules(); + for (uint32_t I = 0; I < Modules.getModuleCount(); ++I) { + DbiModuleDescriptor Descriptor = Modules.getModuleDescriptor(I); + uint16_t SN = Descriptor.getModuleStreamIndex(); if (SN != kInvalidStreamIndex) - ModStreams[SN] = &ModI; + ModStreams[SN] = Descriptor; } } if (Info) { @@ -109,7 +112,7 @@ void discoverStreamPurposes(PDBFile &File, auto NSIter = NamedStreams.find(StreamIdx); if (ModIter != ModStreams.end()) { Value = "Module \""; - Value += ModIter->second->Info.getModuleName().str(); + Value += ModIter->second.getModuleName(); Value += "\""; } else if (NSIter != NamedStreams.end()) { Value = "Named Stream \""; diff --git a/llvm/tools/llvm-pdbdump/YAMLOutputStyle.cpp b/llvm/tools/llvm-pdbdump/YAMLOutputStyle.cpp index b94b5a4abf37c..0573b23cdc761 100644 --- a/llvm/tools/llvm-pdbdump/YAMLOutputStyle.cpp +++ b/llvm/tools/llvm-pdbdump/YAMLOutputStyle.cpp @@ -305,23 +305,28 @@ Error YAMLOutputStyle::dumpDbiStream() { Obj.DbiStream->PdbDllVersion = DS.getPdbDllVersion(); Obj.DbiStream->VerHeader = DS.getDbiVersion(); if (opts::pdb2yaml::DbiModuleInfo) { - for (const auto &MI : DS.modules()) { + const auto &Modules = DS.modules(); + for (uint32_t I = 0; I < Modules.getModuleCount(); ++I) { + DbiModuleDescriptor MI = Modules.getModuleDescriptor(I); + Obj.DbiStream->ModInfos.emplace_back(); yaml::PdbDbiModuleInfo &DMI = Obj.DbiStream->ModInfos.back(); - DMI.Mod = MI.Info.getModuleName(); - DMI.Obj = MI.Info.getObjFileName(); - if (opts::pdb2yaml::DbiModuleSourceFileInfo) - DMI.SourceFiles = MI.SourceFiles; + DMI.Mod = MI.getModuleName(); + DMI.Obj = MI.getObjFileName(); + if (opts::pdb2yaml::DbiModuleSourceFileInfo) { + auto Files = Modules.source_files(I); + DMI.SourceFiles.assign(Files.begin(), Files.end()); + } - uint16_t ModiStream = MI.Info.getModuleStreamIndex(); + uint16_t ModiStream = MI.getModuleStreamIndex(); if (ModiStream == kInvalidStreamIndex) continue; auto ModStreamData = msf::MappedBlockStream::createIndexedStream( File.getMsfLayout(), File.getMsfBuffer(), ModiStream); - pdb::ModuleDebugStreamRef ModS(MI.Info, std::move(ModStreamData)); + pdb::ModuleDebugStreamRef ModS(MI, std::move(ModStreamData)); if (auto EC = ModS.reload()) return EC;