Skip to content

Commit

Permalink
Add an index for Module Metadata record in the bitcode
Browse files Browse the repository at this point in the history
Summary:
This index record the position for each metadata record in
the bitcode, so that the reader will be able to lazy-load
on demand each individual record.

We also make sure that every abbrev is emitted upfront so
that the block can be skipped while reading.

I don't plan to commit this before having the reader
counterpart, but I figured this can be reviewed mostly
independently.

Reviewers: pcc, tejohnson

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D28083

llvm-svn: 290684
  • Loading branch information
joker-eph committed Dec 28, 2016
1 parent 26dada7 commit 32ca148
Show file tree
Hide file tree
Showing 9 changed files with 133 additions and 8 deletions.
7 changes: 6 additions & 1 deletion llvm/include/llvm/Bitcode/BitstreamWriter.h
Expand Up @@ -112,6 +112,11 @@ class BitstreamWriter {
&Out[ByteNo], NewWord, BitNo & 7);
}

void BackpatchWord64(uint64_t BitNo, uint64_t Val) {
BackpatchWord(BitNo, (uint32_t)Val);
BackpatchWord(BitNo + 32, (uint32_t)(Val >> 32));
}

void Emit(uint32_t Val, unsigned NumBits) {
assert(NumBits && NumBits <= 32 && "Invalid value size!");
assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
Expand Down Expand Up @@ -279,7 +284,7 @@ class BitstreamWriter {
default: llvm_unreachable("Unknown encoding!");
case BitCodeAbbrevOp::Fixed:
if (Op.getEncodingData())
Emit((unsigned)V, (unsigned)Op.getEncodingData());
Emit64(V, (unsigned)Op.getEncodingData());
break;
case BitCodeAbbrevOp::VBR:
if (Op.getEncodingData())
Expand Down
6 changes: 4 additions & 2 deletions llvm/include/llvm/Bitcode/LLVMBitCodes.h
Expand Up @@ -241,7 +241,7 @@ enum MetadataCodes {
METADATA_SUBPROGRAM = 21, // [distinct, ...]
METADATA_LEXICAL_BLOCK = 22, // [distinct, scope, file, line, column]
METADATA_LEXICAL_BLOCK_FILE = 23, //[distinct, scope, file, discriminator]
METADATA_NAMESPACE = 24, // [distinct, scope, file, name, line, exportSymbols]
METADATA_NAMESPACE = 24, // [distinct, scope, file, name, line, exportSymbols]
METADATA_TEMPLATE_TYPE = 25, // [distinct, scope, name, type, ...]
METADATA_TEMPLATE_VALUE = 26, // [distinct, scope, name, type, value, ...]
METADATA_GLOBAL_VAR = 27, // [distinct, ...]
Expand All @@ -254,7 +254,9 @@ enum MetadataCodes {
METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...]
METADATA_STRINGS = 35, // [count, offset] blob([lengths][chars])
METADATA_GLOBAL_DECL_ATTACHMENT = 36, // [valueid, n x [id, mdnode]]
METADATA_GLOBAL_VAR_EXPR = 37, // [distinct, var, expr]
METADATA_GLOBAL_VAR_EXPR = 37, // [distinct, var, expr]
METADATA_INDEX_OFFSET = 38, // [offset]
METADATA_INDEX = 39, // [bitpos]
};

// The constants block (CONSTANTS_BLOCK_ID) describes emission for each
Expand Down
85 changes: 80 additions & 5 deletions llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
Expand Up @@ -224,7 +224,9 @@ class ModuleBitcodeWriter : public BitcodeWriterBase {
void writeMetadataStrings(ArrayRef<const Metadata *> Strings,
SmallVectorImpl<uint64_t> &Record);
void writeMetadataRecords(ArrayRef<const Metadata *> MDs,
SmallVectorImpl<uint64_t> &Record);
SmallVectorImpl<uint64_t> &Record,
std::vector<unsigned> *MDAbbrevs = nullptr,
std::vector<uint64_t> *IndexPos = nullptr);
void writeModuleMetadata();
void writeFunctionMetadata(const Function &F);
void writeFunctionMetadataAttachment(const Function &F);
Expand Down Expand Up @@ -1854,8 +1856,16 @@ void ModuleBitcodeWriter::writeMetadataStrings(
Record.clear();
}

// Generates an enum to use as an index in the Abbrev array of Metadata record.
enum MetadataAbbrev : unsigned {
#define HANDLE_MDNODE_LEAF(CLASS) CLASS##AbbrevID,
#include "llvm/IR/Metadata.def"
LastPlusOne
};

void ModuleBitcodeWriter::writeMetadataRecords(
ArrayRef<const Metadata *> MDs, SmallVectorImpl<uint64_t> &Record) {
ArrayRef<const Metadata *> MDs, SmallVectorImpl<uint64_t> &Record,
std::vector<unsigned> *MDAbbrevs, std::vector<uint64_t> *IndexPos) {
if (MDs.empty())
return;

Expand All @@ -1864,6 +1874,8 @@ void ModuleBitcodeWriter::writeMetadataRecords(
#include "llvm/IR/Metadata.def"

for (const Metadata *MD : MDs) {
if (IndexPos)
IndexPos->push_back(Stream.GetCurrentBitNo());
if (const MDNode *N = dyn_cast<MDNode>(MD)) {
assert(N->isResolved() && "Expected forward references to be resolved");

Expand All @@ -1872,7 +1884,11 @@ void ModuleBitcodeWriter::writeMetadataRecords(
llvm_unreachable("Invalid MDNode subclass");
#define HANDLE_MDNODE_LEAF(CLASS) \
case Metadata::CLASS##Kind: \
write##CLASS(cast<CLASS>(N), Record, CLASS##Abbrev); \
if (MDAbbrevs) \
write##CLASS(cast<CLASS>(N), Record, \
(*MDAbbrevs)[MetadataAbbrev::CLASS##AbbrevID]); \
else \
write##CLASS(cast<CLASS>(N), Record, CLASS##Abbrev); \
continue;
#include "llvm/IR/Metadata.def"
}
Expand All @@ -1885,10 +1901,69 @@ void ModuleBitcodeWriter::writeModuleMetadata() {
if (!VE.hasMDs() && M.named_metadata_empty())
return;

Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 4);
SmallVector<uint64_t, 64> Record;

// Emit all abbrevs upfront, so that the reader can jump in the middle of the
// block and load any metadata.
std::vector<unsigned> MDAbbrevs;

MDAbbrevs.resize(MetadataAbbrev::LastPlusOne);
MDAbbrevs[MetadataAbbrev::DILocationAbbrevID] = createDILocationAbbrev();
MDAbbrevs[MetadataAbbrev::GenericDINodeAbbrevID] =
createGenericDINodeAbbrev();

BitCodeAbbrev *Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX_OFFSET));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 64));
unsigned OffsetAbbrev = Stream.EmitAbbrev(Abbv);

Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
unsigned IndexAbbrev = Stream.EmitAbbrev(Abbv);

// Emit MDStrings together upfront.
writeMetadataStrings(VE.getMDStrings(), Record);
writeMetadataRecords(VE.getNonMDStrings(), Record);

// Write a placeholder value in for the offset of the metadata index,
// which is written after the records, so that it can include
// the offset of each entry. The placeholder offset will be
// updated after all records are emitted.
uint64_t Vals[] = {0};
Stream.EmitRecord(bitc::METADATA_INDEX_OFFSET, Vals, OffsetAbbrev);

// Compute and save the bit offset to the current position, which will be
// patched when we emit the index later. We can simply subtract the 64-bit
// fixed size from the current bit number to get the location to backpatch.
uint64_t IndexOffsetRecordBitPos = Stream.GetCurrentBitNo();

// This index will contain the bitpos for each individual record.
std::vector<uint64_t> IndexPos;
IndexPos.reserve(VE.getNonMDStrings().size());

// Write all the records
writeMetadataRecords(VE.getNonMDStrings(), Record, &MDAbbrevs, &IndexPos);

// Now that we have emitted all the records we will emit the index. But first
// backpatch the forward reference so that the reader can skip the records
// efficiently.
Stream.BackpatchWord64(IndexOffsetRecordBitPos - 64,
Stream.GetCurrentBitNo() - IndexOffsetRecordBitPos);

// Delta encode the index.
uint64_t PreviousValue = IndexOffsetRecordBitPos;
for (auto &Elt : IndexPos) {
auto EltDelta = Elt - PreviousValue;
PreviousValue = Elt;
Elt = EltDelta;
}
// Emit the index record.
Stream.EmitRecord(bitc::METADATA_INDEX, IndexPos, IndexAbbrev);
IndexPos.clear();

// Write the named metadata now.
writeNamedMetadata(Record);

auto AddDeclAttachedMetadata = [&](const GlobalObject &GO) {
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/Bitcode/mdnodes-distinct-in-post-order.ll
Expand Up @@ -17,6 +17,10 @@
; CHECK-NEXT: <DISTINCT_NODE op0=1 op1=3 op2=2/>
!4 = distinct !{!1, !3, !2}

; Before the named records we emit the index containing the position of the
; previously emitted records
; CHECK-NEXT: <INDEX {{.*}} (offset match)

; Note: named metadata nodes are not cannot reference null so their operands
; are numbered off-by-one.
; CHECK-NEXT: <NAME
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/Bitcode/mdnodes-distinct-nodes-break-cycles.ll
Expand Up @@ -22,6 +22,10 @@
; CHECK-NEXT: <NODE op0=2/>
!3 = !{!2}

; Before the named records we emit the index containing the position of the
; previously emitted records
; CHECK-NEXT: <INDEX {{.*}} (offset match)

; Note: named metadata nodes are not cannot reference null so their operands
; are numbered off-by-one.
; CHECK-NEXT: <NAME
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/Bitcode/mdnodes-distinct-nodes-first.ll
Expand Up @@ -11,6 +11,10 @@
; CHECK-NEXT: <NODE op0=1/>
!2 = !{!1}

; Before the named records we emit the index containing the position of the
; previously emitted records
; CHECK-NEXT: <INDEX {{.*}} (offset match)

; Note: named metadata nodes are not cannot reference null so their operands
; are numbered off-by-one.
; CHECK-NEXT: <NAME
Expand Down
7 changes: 7 additions & 0 deletions llvm/test/Bitcode/mdnodes-in-post-order.ll
Expand Up @@ -15,6 +15,9 @@
; CHECK-NEXT: 'leaf
; CHECK-NEXT: }

; Before the records we emit an offset to the index for the block
; CHECK-NEXT: <INDEX_OFFSET

; The leafs should come first (in either order).
; CHECK-NEXT: <NODE op0=1/>
; CHECK-NEXT: <NODE op0=2/>
Expand All @@ -27,6 +30,10 @@
; CHECK-NEXT: <NODE op0=3 op1=5 op2=4/>
!6 = !{!3, !5, !4}

; Before the named records we emit the index containing the position of the
; previously emitted records
; CHECK-NEXT: <INDEX {{.*}} (offset match)

; Note: named metadata nodes are not cannot reference null so their operands
; are numbered off-by-one.
; CHECK-NEXT: <NAME
Expand Down
3 changes: 3 additions & 0 deletions llvm/test/Bitcode/metadata-function-blocks.ll
Expand Up @@ -19,6 +19,9 @@
; Each node gets a new number. Bottom-up traversal of nodes.
!named = !{!6}

; Before the records we emit an offset to the index for the block
; CHECK-NEXT: <INDEX_OFFSET

; CHECK-NEXT: <NODE op0=1/>
!4 = !{!"named"}

Expand Down
21 changes: 21 additions & 0 deletions llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
Expand Up @@ -353,6 +353,8 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(METADATA, OBJC_PROPERTY)
STRINGIFY_CODE(METADATA, IMPORTED_ENTITY)
STRINGIFY_CODE(METADATA, MODULE)
STRINGIFY_CODE(METADATA, INDEX_OFFSET)
STRINGIFY_CODE(METADATA, INDEX)
}
case bitc::METADATA_KIND_BLOCK_ID:
switch (CodeID) {
Expand Down Expand Up @@ -514,6 +516,9 @@ static bool ParseBlock(BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo,

SmallVector<uint64_t, 64> Record;

// Keep the offset to the metadata index if seen.
uint64_t MetadataIndexOffset = 0;

// Read all the records for this block.
while (1) {
if (Stream.AtEndOfStream())
Expand Down Expand Up @@ -600,6 +605,22 @@ static bool ParseBlock(BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo,
for (unsigned i = 0, e = Record.size(); i != e; ++i)
outs() << " op" << i << "=" << (int64_t)Record[i];

// If we found a metadata index, let's verify that we had an offset before
// and validate its forward reference offset was correct!
if (BlockID == bitc::METADATA_BLOCK_ID) {
if (Code == bitc::METADATA_INDEX_OFFSET) {
MetadataIndexOffset = Stream.GetCurrentBitNo() + Record[0];
}
if (Code == bitc::METADATA_INDEX) {
outs() << " (offset ";
if (MetadataIndexOffset == RecordStartBit)
outs() << "match)";
else
outs() << "mismatch: " << MetadataIndexOffset << " vs "
<< RecordStartBit << ")";
}
}

// If we found a module hash, let's verify that it matches!
if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH) {
if (Record.size() != 5)
Expand Down

0 comments on commit 32ca148

Please sign in to comment.