From 32ca14819848b362af9e1879f3811e68427f5279 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 28 Dec 2016 19:44:19 +0000 Subject: [PATCH] Add an index for Module Metadata record in the bitcode Summary: This index record the position for each metadata record in the bitcode, so that the reader will be able to lazy-load on demand each individual record. We also make sure that every abbrev is emitted upfront so that the block can be skipped while reading. I don't plan to commit this before having the reader counterpart, but I figured this can be reviewed mostly independently. Reviewers: pcc, tejohnson Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D28083 llvm-svn: 290684 --- llvm/include/llvm/Bitcode/BitstreamWriter.h | 7 +- llvm/include/llvm/Bitcode/LLVMBitCodes.h | 6 +- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 85 +++++++++++++++++-- .../Bitcode/mdnodes-distinct-in-post-order.ll | 4 + .../mdnodes-distinct-nodes-break-cycles.ll | 4 + .../Bitcode/mdnodes-distinct-nodes-first.ll | 4 + llvm/test/Bitcode/mdnodes-in-post-order.ll | 7 ++ llvm/test/Bitcode/metadata-function-blocks.ll | 3 + .../tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp | 21 +++++ 9 files changed, 133 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Bitcode/BitstreamWriter.h b/llvm/include/llvm/Bitcode/BitstreamWriter.h index b79e88d2c0580..06113869ad6d8 100644 --- a/llvm/include/llvm/Bitcode/BitstreamWriter.h +++ b/llvm/include/llvm/Bitcode/BitstreamWriter.h @@ -112,6 +112,11 @@ class BitstreamWriter { &Out[ByteNo], NewWord, BitNo & 7); } + void BackpatchWord64(uint64_t BitNo, uint64_t Val) { + BackpatchWord(BitNo, (uint32_t)Val); + BackpatchWord(BitNo + 32, (uint32_t)(Val >> 32)); + } + void Emit(uint32_t Val, unsigned NumBits) { assert(NumBits && NumBits <= 32 && "Invalid value size!"); assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!"); @@ -279,7 +284,7 @@ class BitstreamWriter { default: llvm_unreachable("Unknown encoding!"); case BitCodeAbbrevOp::Fixed: if (Op.getEncodingData()) - Emit((unsigned)V, (unsigned)Op.getEncodingData()); + Emit64(V, (unsigned)Op.getEncodingData()); break; case BitCodeAbbrevOp::VBR: if (Op.getEncodingData()) diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index f42e75427b361..8b8cc7a8b761c 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -241,7 +241,7 @@ enum MetadataCodes { METADATA_SUBPROGRAM = 21, // [distinct, ...] METADATA_LEXICAL_BLOCK = 22, // [distinct, scope, file, line, column] METADATA_LEXICAL_BLOCK_FILE = 23, //[distinct, scope, file, discriminator] - METADATA_NAMESPACE = 24, // [distinct, scope, file, name, line, exportSymbols] + METADATA_NAMESPACE = 24, // [distinct, scope, file, name, line, exportSymbols] METADATA_TEMPLATE_TYPE = 25, // [distinct, scope, name, type, ...] METADATA_TEMPLATE_VALUE = 26, // [distinct, scope, name, type, value, ...] METADATA_GLOBAL_VAR = 27, // [distinct, ...] @@ -254,7 +254,9 @@ enum MetadataCodes { METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...] METADATA_STRINGS = 35, // [count, offset] blob([lengths][chars]) METADATA_GLOBAL_DECL_ATTACHMENT = 36, // [valueid, n x [id, mdnode]] - METADATA_GLOBAL_VAR_EXPR = 37, // [distinct, var, expr] + METADATA_GLOBAL_VAR_EXPR = 37, // [distinct, var, expr] + METADATA_INDEX_OFFSET = 38, // [offset] + METADATA_INDEX = 39, // [bitpos] }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 9d2bea8ccaeff..a8117b192eb89 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -224,7 +224,9 @@ class ModuleBitcodeWriter : public BitcodeWriterBase { void writeMetadataStrings(ArrayRef Strings, SmallVectorImpl &Record); void writeMetadataRecords(ArrayRef MDs, - SmallVectorImpl &Record); + SmallVectorImpl &Record, + std::vector *MDAbbrevs = nullptr, + std::vector *IndexPos = nullptr); void writeModuleMetadata(); void writeFunctionMetadata(const Function &F); void writeFunctionMetadataAttachment(const Function &F); @@ -1854,8 +1856,16 @@ void ModuleBitcodeWriter::writeMetadataStrings( Record.clear(); } +// Generates an enum to use as an index in the Abbrev array of Metadata record. +enum MetadataAbbrev : unsigned { +#define HANDLE_MDNODE_LEAF(CLASS) CLASS##AbbrevID, +#include "llvm/IR/Metadata.def" + LastPlusOne +}; + void ModuleBitcodeWriter::writeMetadataRecords( - ArrayRef MDs, SmallVectorImpl &Record) { + ArrayRef MDs, SmallVectorImpl &Record, + std::vector *MDAbbrevs, std::vector *IndexPos) { if (MDs.empty()) return; @@ -1864,6 +1874,8 @@ void ModuleBitcodeWriter::writeMetadataRecords( #include "llvm/IR/Metadata.def" for (const Metadata *MD : MDs) { + if (IndexPos) + IndexPos->push_back(Stream.GetCurrentBitNo()); if (const MDNode *N = dyn_cast(MD)) { assert(N->isResolved() && "Expected forward references to be resolved"); @@ -1872,7 +1884,11 @@ void ModuleBitcodeWriter::writeMetadataRecords( llvm_unreachable("Invalid MDNode subclass"); #define HANDLE_MDNODE_LEAF(CLASS) \ case Metadata::CLASS##Kind: \ - write##CLASS(cast(N), Record, CLASS##Abbrev); \ + if (MDAbbrevs) \ + write##CLASS(cast(N), Record, \ + (*MDAbbrevs)[MetadataAbbrev::CLASS##AbbrevID]); \ + else \ + write##CLASS(cast(N), Record, CLASS##Abbrev); \ continue; #include "llvm/IR/Metadata.def" } @@ -1885,10 +1901,69 @@ void ModuleBitcodeWriter::writeModuleMetadata() { if (!VE.hasMDs() && M.named_metadata_empty()) return; - Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); + Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 4); SmallVector Record; + + // Emit all abbrevs upfront, so that the reader can jump in the middle of the + // block and load any metadata. + std::vector MDAbbrevs; + + MDAbbrevs.resize(MetadataAbbrev::LastPlusOne); + MDAbbrevs[MetadataAbbrev::DILocationAbbrevID] = createDILocationAbbrev(); + MDAbbrevs[MetadataAbbrev::GenericDINodeAbbrevID] = + createGenericDINodeAbbrev(); + + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX_OFFSET)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 64)); + unsigned OffsetAbbrev = Stream.EmitAbbrev(Abbv); + + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + unsigned IndexAbbrev = Stream.EmitAbbrev(Abbv); + + // Emit MDStrings together upfront. writeMetadataStrings(VE.getMDStrings(), Record); - writeMetadataRecords(VE.getNonMDStrings(), Record); + + // Write a placeholder value in for the offset of the metadata index, + // which is written after the records, so that it can include + // the offset of each entry. The placeholder offset will be + // updated after all records are emitted. + uint64_t Vals[] = {0}; + Stream.EmitRecord(bitc::METADATA_INDEX_OFFSET, Vals, OffsetAbbrev); + + // Compute and save the bit offset to the current position, which will be + // patched when we emit the index later. We can simply subtract the 64-bit + // fixed size from the current bit number to get the location to backpatch. + uint64_t IndexOffsetRecordBitPos = Stream.GetCurrentBitNo(); + + // This index will contain the bitpos for each individual record. + std::vector IndexPos; + IndexPos.reserve(VE.getNonMDStrings().size()); + + // Write all the records + writeMetadataRecords(VE.getNonMDStrings(), Record, &MDAbbrevs, &IndexPos); + + // Now that we have emitted all the records we will emit the index. But first + // backpatch the forward reference so that the reader can skip the records + // efficiently. + Stream.BackpatchWord64(IndexOffsetRecordBitPos - 64, + Stream.GetCurrentBitNo() - IndexOffsetRecordBitPos); + + // Delta encode the index. + uint64_t PreviousValue = IndexOffsetRecordBitPos; + for (auto &Elt : IndexPos) { + auto EltDelta = Elt - PreviousValue; + PreviousValue = Elt; + Elt = EltDelta; + } + // Emit the index record. + Stream.EmitRecord(bitc::METADATA_INDEX, IndexPos, IndexAbbrev); + IndexPos.clear(); + + // Write the named metadata now. writeNamedMetadata(Record); auto AddDeclAttachedMetadata = [&](const GlobalObject &GO) { diff --git a/llvm/test/Bitcode/mdnodes-distinct-in-post-order.ll b/llvm/test/Bitcode/mdnodes-distinct-in-post-order.ll index 6e6ba604235b9..45ec48d9ab653 100644 --- a/llvm/test/Bitcode/mdnodes-distinct-in-post-order.ll +++ b/llvm/test/Bitcode/mdnodes-distinct-in-post-order.ll @@ -17,6 +17,10 @@ ; CHECK-NEXT: !4 = distinct !{!1, !3, !2} +; Before the named records we emit the index containing the position of the +; previously emitted records +; CHECK-NEXT: !3 = !{!2} +; Before the named records we emit the index containing the position of the +; previously emitted records +; CHECK-NEXT: !2 = !{!1} +; Before the named records we emit the index containing the position of the +; previously emitted records +; CHECK-NEXT: ; CHECK-NEXT: @@ -27,6 +30,10 @@ ; CHECK-NEXT: !6 = !{!3, !5, !4} +; Before the named records we emit the index containing the position of the +; previously emitted records +; CHECK-NEXT: !4 = !{!"named"} diff --git a/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp index e494343bf8799..3220063275b09 100644 --- a/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ b/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -353,6 +353,8 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID, STRINGIFY_CODE(METADATA, OBJC_PROPERTY) STRINGIFY_CODE(METADATA, IMPORTED_ENTITY) STRINGIFY_CODE(METADATA, MODULE) + STRINGIFY_CODE(METADATA, INDEX_OFFSET) + STRINGIFY_CODE(METADATA, INDEX) } case bitc::METADATA_KIND_BLOCK_ID: switch (CodeID) { @@ -514,6 +516,9 @@ static bool ParseBlock(BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo, SmallVector Record; + // Keep the offset to the metadata index if seen. + uint64_t MetadataIndexOffset = 0; + // Read all the records for this block. while (1) { if (Stream.AtEndOfStream()) @@ -600,6 +605,22 @@ static bool ParseBlock(BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo, for (unsigned i = 0, e = Record.size(); i != e; ++i) outs() << " op" << i << "=" << (int64_t)Record[i]; + // If we found a metadata index, let's verify that we had an offset before + // and validate its forward reference offset was correct! + if (BlockID == bitc::METADATA_BLOCK_ID) { + if (Code == bitc::METADATA_INDEX_OFFSET) { + MetadataIndexOffset = Stream.GetCurrentBitNo() + Record[0]; + } + if (Code == bitc::METADATA_INDEX) { + outs() << " (offset "; + if (MetadataIndexOffset == RecordStartBit) + outs() << "match)"; + else + outs() << "mismatch: " << MetadataIndexOffset << " vs " + << RecordStartBit << ")"; + } + } + // If we found a module hash, let's verify that it matches! if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH) { if (Record.size() != 5)