Skip to content

Commit

Permalink
[PDB] Print the most redundant type record indices with /summary
Browse files Browse the repository at this point in the history
Summary:
I used this information to motivate splitting up the Intrinsic::ID enum
(5d98695) and adding a key method to
clang::Sema (586f65d) which saved a
fair amount of object file size.

Example output for clang.pdb:

  Top 10 types responsible for the most TPI input bytes:
         index     total bytes   count     size
        0x3890:      8,671,220 = 1,805 *  4,804
       0xE13BE:      5,634,720 =   252 * 22,360
       0x6874C:      5,181,600 =   408 * 12,700
        0x2A1F:      4,520,528 = 1,574 *  2,872
       0x64BFF:      4,024,020 =   469 *  8,580
        0x1123:      4,012,020 = 2,157 *  1,860
        0x6952:      3,753,792 =   912 *  4,116
        0xC16F:      3,630,888 =   633 *  5,736
        0x69DD:      3,601,160 =   985 *  3,656
        0x678D:      3,577,904 =   319 * 11,216

In this case, we can see that record 0x3890 is responsible for ~8MB of
total object file size for objects in clang.

The user can then use llvm-pdbutil to find out what the record is:

  $ llvm-pdbutil dump -types -type-index 0x3890
                       Types (TPI Stream)
  ============================================================
    Showing 1 records.
       0x3890 | LF_FIELDLIST [size = 4804]
                - LF_STMEMBER [name = `WORDTYPE_MAX`, type = 0x1001, attrs = public]
                - LF_MEMBER [name = `U`, Type = 0x37F0, offset = 0, attrs = private]
                - LF_MEMBER [name = `BitWidth`, Type = 0x0075 (unsigned), offset = 8, attrs = private]
                - LF_METHOD [name = `APInt`, # overloads = 8, overload list = 0x3805]
  ...

In this case, we can see that these are members of the APInt class,
which is emitted in 1805 object files.

The next largest type is ASTContext:

  $ llvm-pdbutil dump -types -type-index 0xE13BE bin/clang.pdb
      0xE13BE | LF_FIELDLIST [size = 22360]
                - LF_BCLASS
                  type = 0x653EA, offset = 0, attrs = public
                - LF_MEMBER [name = `Types`, Type = 0x653EB, offset = 8, attrs = private]
                - LF_MEMBER [name = `ExtQualNodes`, Type = 0x653EC, offset = 24, attrs = private]
                - LF_MEMBER [name = `ComplexTypes`, Type = 0x653ED, offset = 48, attrs = private]
                - LF_MEMBER [name = `PointerTypes`, Type = 0x653EE, offset = 72, attrs = private]
  ...

ASTContext only appears 252 times, but the list of members is long, and
must be repeated everywhere it is used.

This was the output before I split Intrinsic::ID:

  Top 10 types responsible for the most TPI input:
        0x686C:     69,823,920 = 1,070 * 65,256
        0x686D:     69,819,640 = 1,070 * 65,252
        0x686E:     69,819,640 = 1,070 * 65,252
        0x686B:     16,371,000 = 1,070 * 15,300
        ...

These records were all lists of intrinsic enums.

Reviewers: MaskRay, ruiu

Subscribers: mgrang, zturner, thakis, hans, akhuang, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71437
  • Loading branch information
rnk committed Jan 3, 2020
1 parent 3559831 commit 783db78
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 27 deletions.
91 changes: 90 additions & 1 deletion lld/COFF/PDB.cpp
Expand Up @@ -16,8 +16,8 @@
#include "TypeMerger.h"
#include "Writer.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Timer.h"
#include "lld/Common/Threads.h"
#include "lld/Common/Timer.h"
#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
Expand All @@ -30,6 +30,7 @@
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
#include "llvm/DebugInfo/MSF/MSFCommon.h"
Expand All @@ -54,6 +55,7 @@
#include "llvm/Support/CRC.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FormatAdapters.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ScopedPrinter.h"
Expand Down Expand Up @@ -189,6 +191,11 @@ class PDBLinker {
uint64_t globalSymbols = 0;
uint64_t moduleSymbols = 0;
uint64_t publicSymbols = 0;

// When showSummary is enabled, these are histograms of TPI and IPI records
// keyed by type index.
SmallVector<uint32_t, 0> tpiCounts;
SmallVector<uint32_t, 0> ipiCounts;
};

class DebugSHandler {
Expand Down Expand Up @@ -415,6 +422,27 @@ PDBLinker::mergeDebugT(ObjFile *file, CVIndexMap *objectIndexMap) {
fatal("codeview::mergeTypeAndIdRecords failed: " +
toString(std::move(err)));
}

if (config->showSummary) {
// Count how many times we saw each type record in our input. This
// calculation requires a second pass over the type records to classify each
// record as a type or index. This is slow, but this code executes when
// collecting statistics.
tpiCounts.resize(tMerger.getTypeTable().size());
ipiCounts.resize(tMerger.getIDTable().size());
uint32_t srcIdx = 0;
for (CVType &ty : types) {
TypeIndex dstIdx = objectIndexMap->tpiMap[srcIdx++];
// Type merging may fail, so a complex source type may become the simple
// NotTranslated type, which cannot be used as an array index.
if (dstIdx.isSimple())
continue;
SmallVectorImpl<uint32_t> &counts =
isIdRecord(ty.kind()) ? ipiCounts : tpiCounts;
++counts[dstIdx.toArrayIndex()];
}
}

return *objectIndexMap;
}

Expand Down Expand Up @@ -482,6 +510,20 @@ Expected<const CVIndexMap &> PDBLinker::maybeMergeTypeServerPDB(ObjFile *file) {
}
}

if (config->showSummary) {
// Count how many times we saw each type record in our input. If a
// destination type index is present in the source to destination type index
// map, that means we saw it once in the input. Add it to our histogram.
tpiCounts.resize(tMerger.getTypeTable().size());
ipiCounts.resize(tMerger.getIDTable().size());
for (TypeIndex ti : indexMap.tpiMap)
if (!ti.isSimple())
++tpiCounts[ti.toArrayIndex()];
for (TypeIndex ti : indexMap.ipiMap)
if (!ti.isSimple())
++ipiCounts[ti.toArrayIndex()];
}

return indexMap;
}

Expand Down Expand Up @@ -1334,6 +1376,53 @@ void PDBLinker::printStats() {
print(moduleSymbols, "Module symbol records");
print(publicSymbols, "Public symbol records");

auto printLargeInputTypeRecs = [&](StringRef name,
ArrayRef<uint32_t> recCounts,
TypeCollection &records) {
// Figure out which type indices were responsible for the most duplicate
// bytes in the input files. These should be frequently emitted LF_CLASS and
// LF_FIELDLIST records.
struct TypeSizeInfo {
uint32_t typeSize;
uint32_t dupCount;
TypeIndex typeIndex;
uint64_t totalInputSize() const { return uint64_t(dupCount) * typeSize; }
bool operator<(const TypeSizeInfo &rhs) const {
return totalInputSize() < rhs.totalInputSize();
}
};
SmallVector<TypeSizeInfo, 0> tsis;
for (auto e : enumerate(recCounts)) {
TypeIndex typeIndex = TypeIndex::fromArrayIndex(e.index());
uint32_t typeSize = records.getType(typeIndex).length();
uint32_t dupCount = e.value();
tsis.push_back({typeSize, dupCount, typeIndex});
}

if (!tsis.empty()) {
stream << "\nTop 10 types responsible for the most " << name
<< " input:\n";
stream << " index total bytes count size\n";
llvm::sort(tsis);
unsigned i = 0;
for (const auto &tsi : reverse(tsis)) {
stream << formatv(" {0,10:X}: {1,14:N} = {2,5:N} * {3,6:N}\n",
tsi.typeIndex.getIndex(), tsi.totalInputSize(),
tsi.dupCount, tsi.typeSize);
if (++i >= 10)
break;
}
stream
<< "Run llvm-pdbutil to print details about a particular record:\n";
stream << formatv("llvm-pdbutil dump -{0}s -{0}-index {1:X} {2}\n",
(name == "TPI" ? "type" : "id"),
tsis.back().typeIndex.getIndex(), config->pdbPath);
}
};

printLargeInputTypeRecs("TPI", tpiCounts, tMerger.getTypeTable());
printLargeInputTypeRecs("IPI", ipiCounts, tMerger.getIDTable());

message(buffer);
}

Expand Down
14 changes: 13 additions & 1 deletion lld/test/COFF/pdb-type-server-simple.test
Expand Up @@ -105,4 +105,16 @@ SUMMARY-NEXT: 25 Merged TPI records
SUMMARY-NEXT: 3 Output PDB strings
SUMMARY-NEXT: 4 Global symbol records
SUMMARY-NEXT: 14 Module symbol records
SUMMARY-NEXT: 2 Public symbol records
SUMMARY-NEXT: 2 Public symbol records

SUMMARY: Top 10 types responsible for the most TPI input:
SUMMARY-NEXT: index total bytes count size
SUMMARY-NEXT: 0x1006: 36 = 1 * 36
SUMMARY: Run llvm-pdbutil to print details about a particular record:
SUMMARY-NEXT: llvm-pdbutil dump -types -type-index 0x1006 t.pdb

SUMMARY: Top 10 types responsible for the most IPI input:
SUMMARY-NEXT: index total bytes count size
SUMMARY-NEXT: 0x1006: 256 = 1 * 256
SUMMARY: Run llvm-pdbutil to print details about a particular record:
SUMMARY-NEXT: llvm-pdbutil dump -ids -id-index 0x1006 t.pdb
Expand Up @@ -43,7 +43,7 @@ class GlobalTypeTableBuilder : public TypeCollection {
/// Contains a list of all records indexed by TypeIndex.toArrayIndex().
SmallVector<ArrayRef<uint8_t>, 2> SeenRecords;

/// Contains a list of all hash values inexed by TypeIndex.toArrayIndex().
/// Contains a list of all hash values indexed by TypeIndex.toArrayIndex().
SmallVector<GloballyHashedType, 2> SeenHashes;

public:
Expand Down
37 changes: 28 additions & 9 deletions llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
Expand Up @@ -12,16 +12,35 @@
#include "llvm/DebugInfo/CodeView/TypeRecord.h"

namespace llvm {
namespace codeview {
/// Given an arbitrary codeview type, determine if it is an LF_STRUCTURE,
/// LF_CLASS, LF_INTERFACE, LF_UNION, or LF_ENUM with the forward ref class
/// option.
bool isUdtForwardRef(CVType CVT);

/// Given a CVType which is assumed to be an LF_MODIFIER, return the
/// TypeIndex of the type that the LF_MODIFIER modifies.
TypeIndex getModifiedType(const CVType &CVT);
namespace codeview {

/// Given an arbitrary codeview type, determine if it is an LF_STRUCTURE,
/// LF_CLASS, LF_INTERFACE, LF_UNION, or LF_ENUM with the forward ref class
/// option.
bool isUdtForwardRef(CVType CVT);

/// Given a CVType which is assumed to be an LF_MODIFIER, return the
/// TypeIndex of the type that the LF_MODIFIER modifies.
TypeIndex getModifiedType(const CVType &CVT);

/// Return true if this record should be in the IPI stream of a PDB. In an
/// object file, these record kinds will appear mixed into the .debug$T section.
inline bool isIdRecord(TypeLeafKind K) {
switch (K) {
case TypeLeafKind::LF_FUNC_ID:
case TypeLeafKind::LF_MFUNC_ID:
case TypeLeafKind::LF_STRING_ID:
case TypeLeafKind::LF_SUBSTR_LIST:
case TypeLeafKind::LF_BUILDINFO:
case TypeLeafKind::LF_UDT_SRC_LINE:
case TypeLeafKind::LF_UDT_MOD_SRC_LINE:
return true;
default:
return false;
}
}

} // namespace codeview
} // namespace llvm

#endif
16 changes: 1 addition & 15 deletions llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
Expand Up @@ -15,6 +15,7 @@
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
#include "llvm/Support/Error.h"

using namespace llvm;
Expand Down Expand Up @@ -202,21 +203,6 @@ class TypeStreamMerger {

const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated);

static bool isIdRecord(TypeLeafKind K) {
switch (K) {
case TypeLeafKind::LF_FUNC_ID:
case TypeLeafKind::LF_MFUNC_ID:
case TypeLeafKind::LF_STRING_ID:
case TypeLeafKind::LF_SUBSTR_LIST:
case TypeLeafKind::LF_BUILDINFO:
case TypeLeafKind::LF_UDT_SRC_LINE:
case TypeLeafKind::LF_UDT_MOD_SRC_LINE:
return true;
default:
return false;
}
}

void TypeStreamMerger::addMapping(TypeIndex Idx) {
if (!IsSecondPass) {
assert(IndexMap.size() == slotForIndex(CurIndex) &&
Expand Down

0 comments on commit 783db78

Please sign in to comment.