301 changes: 236 additions & 65 deletions llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,10 @@ void BitcodeWriterBase::writeModuleVersion() {
Stream.EmitRecord(bitc::MODULE_CODE_VERSION, ArrayRef<uint64_t>{2});
}

/// Class to manage the bitcode writing for a module.
class ModuleBitcodeWriter : public BitcodeWriterBase {
/// Pointer to the buffer allocated by caller for bitcode writing.
const SmallVectorImpl<char> &Buffer;

/// Base class to manage the module bitcode writing, currently subclassed for
/// ModuleBitcodeWriter and ThinLinkBitcodeWriter.
class ModuleBitcodeWriterBase : public BitcodeWriterBase {
protected:
/// The Module to write to bitcode.
const Module &M;

Expand All @@ -111,22 +110,6 @@ class ModuleBitcodeWriter : public BitcodeWriterBase {
/// Optional per-module index to write for ThinLTO.
const ModuleSummaryIndex *Index;

/// True if a module hash record should be written.
bool GenerateHash;

SHA1 Hasher;

/// If non-null, when GenerateHash is true, the resulting hash is written
/// into ModHash. When GenerateHash is false, that specified value
/// is used as the hash instead of computing from the generated bitcode.
/// Can be used to produce the same module hash for a minimized bitcode
/// used just for the thin link as in the regular full bitcode that will
/// be used in the backend.
ModuleHash *ModHash;

/// The start bit of the identification block.
uint64_t BitcodeStartBit;

/// Map that holds the correspondence between GUIDs in the summary index,
/// that came from indirect call profiles, and a value id generated by this
/// class to use in the VST and summary block records.
Expand All @@ -140,17 +123,14 @@ class ModuleBitcodeWriter : public BitcodeWriterBase {
uint64_t VSTOffsetPlaceholder = 0;

public:
/// Constructs a ModuleBitcodeWriter object for the given Module,
/// Constructs a ModuleBitcodeWriterBase object for the given Module,
/// writing to the provided \p Buffer.
ModuleBitcodeWriter(const Module *M, SmallVectorImpl<char> &Buffer,
StringTableBuilder &StrtabBuilder,
BitstreamWriter &Stream, bool ShouldPreserveUseListOrder,
const ModuleSummaryIndex *Index, bool GenerateHash,
ModuleHash *ModHash = nullptr)
: BitcodeWriterBase(Stream, StrtabBuilder), Buffer(Buffer), M(*M),
VE(*M, ShouldPreserveUseListOrder), Index(Index),
GenerateHash(GenerateHash), ModHash(ModHash),
BitcodeStartBit(Stream.GetCurrentBitNo()) {
ModuleBitcodeWriterBase(const Module *M, StringTableBuilder &StrtabBuilder,
BitstreamWriter &Stream,
bool ShouldPreserveUseListOrder,
const ModuleSummaryIndex *Index)
: BitcodeWriterBase(Stream, StrtabBuilder), M(*M),
VE(*M, ShouldPreserveUseListOrder), Index(Index) {
// Assign ValueIds to any callee values in the index that came from
// indirect call profiles and were recorded as a GUID not a Value*
// (which would have been assigned an ID by the ValueEnumerator).
Expand All @@ -172,6 +152,70 @@ class ModuleBitcodeWriter : public BitcodeWriterBase {
assignValueId(CallEdge.first.getGUID());
}

protected:
void writePerModuleGlobalValueSummary();

private:
void writePerModuleFunctionSummaryRecord(SmallVector<uint64_t, 64> &NameVals,
GlobalValueSummary *Summary,
unsigned ValueID,
unsigned FSCallsAbbrev,
unsigned FSCallsProfileAbbrev,
const Function &F);
void writeModuleLevelReferences(const GlobalVariable &V,
SmallVector<uint64_t, 64> &NameVals,
unsigned FSModRefsAbbrev);

void assignValueId(GlobalValue::GUID ValGUID) {
GUIDToValueIdMap[ValGUID] = ++GlobalValueId;
}
unsigned getValueId(GlobalValue::GUID ValGUID) {
const auto &VMI = GUIDToValueIdMap.find(ValGUID);
// Expect that any GUID value had a value Id assigned by an
// earlier call to assignValueId.
assert(VMI != GUIDToValueIdMap.end() &&
"GUID does not have assigned value Id");
return VMI->second;
}
// Helper to get the valueId for the type of value recorded in VI.
unsigned getValueId(ValueInfo VI) {
if (!VI.getValue())
return getValueId(VI.getGUID());
return VE.getValueID(VI.getValue());
}
std::map<GlobalValue::GUID, unsigned> &valueIds() { return GUIDToValueIdMap; }
};

/// Class to manage the bitcode writing for a module.
class ModuleBitcodeWriter : public ModuleBitcodeWriterBase {
/// Pointer to the buffer allocated by caller for bitcode writing.
const SmallVectorImpl<char> &Buffer;

/// True if a module hash record should be written.
bool GenerateHash;

/// If non-null, when GenerateHash is true, the resulting hash is written
/// into ModHash.
ModuleHash *ModHash;

SHA1 Hasher;

/// The start bit of the identification block.
uint64_t BitcodeStartBit;

public:
/// Constructs a ModuleBitcodeWriter object for the given Module,
/// writing to the provided \p Buffer.
ModuleBitcodeWriter(const Module *M, SmallVectorImpl<char> &Buffer,
StringTableBuilder &StrtabBuilder,
BitstreamWriter &Stream, bool ShouldPreserveUseListOrder,
const ModuleSummaryIndex *Index, bool GenerateHash,
ModuleHash *ModHash = nullptr)
: ModuleBitcodeWriterBase(M, StrtabBuilder, Stream,
ShouldPreserveUseListOrder, Index),
Buffer(Buffer), GenerateHash(GenerateHash), ModHash(ModHash),
BitcodeStartBit(Stream.GetCurrentBitNo()) {}

/// Emit the current module to the bitstream.
void write();

Expand Down Expand Up @@ -287,37 +331,8 @@ class ModuleBitcodeWriter : public BitcodeWriterBase {
writeFunction(const Function &F,
DenseMap<const Function *, uint64_t> &FunctionToBitcodeIndex);
void writeBlockInfo();
void writePerModuleFunctionSummaryRecord(SmallVector<uint64_t, 64> &NameVals,
GlobalValueSummary *Summary,
unsigned ValueID,
unsigned FSCallsAbbrev,
unsigned FSCallsProfileAbbrev,
const Function &F);
void writeModuleLevelReferences(const GlobalVariable &V,
SmallVector<uint64_t, 64> &NameVals,
unsigned FSModRefsAbbrev);
void writePerModuleGlobalValueSummary();
void writeModuleHash(size_t BlockStartPos);

void assignValueId(GlobalValue::GUID ValGUID) {
GUIDToValueIdMap[ValGUID] = ++GlobalValueId;
}
unsigned getValueId(GlobalValue::GUID ValGUID) {
const auto &VMI = GUIDToValueIdMap.find(ValGUID);
// Expect that any GUID value had a value Id assigned by an
// earlier call to assignValueId.
assert(VMI != GUIDToValueIdMap.end() &&
"GUID does not have assigned value Id");
return VMI->second;
}
// Helper to get the valueId for the type of value recorded in VI.
unsigned getValueId(ValueInfo VI) {
if (!VI.getValue())
return getValueId(VI.getGUID());
return VE.getValueID(VI.getValue());
}
std::map<GlobalValue::GUID, unsigned> &valueIds() { return GUIDToValueIdMap; }

unsigned getEncodedSyncScopeID(SyncScope::ID SSID) {
return unsigned(SSID);
}
Expand Down Expand Up @@ -3267,7 +3282,7 @@ static void writeFunctionTypeMetadataRecords(BitstreamWriter &Stream,
}

// Helper to emit a single function summary record.
void ModuleBitcodeWriter::writePerModuleFunctionSummaryRecord(
void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
const Function &F) {
Expand Down Expand Up @@ -3301,7 +3316,7 @@ void ModuleBitcodeWriter::writePerModuleFunctionSummaryRecord(

// Collect the global value references in the given variable's initializer,
// and emit them in a summary record.
void ModuleBitcodeWriter::writeModuleLevelReferences(
void ModuleBitcodeWriterBase::writeModuleLevelReferences(
const GlobalVariable &V, SmallVector<uint64_t, 64> &NameVals,
unsigned FSModRefsAbbrev) {
auto VI = Index->getValueInfo(GlobalValue::getGUID(V.getName()));
Expand Down Expand Up @@ -3335,7 +3350,7 @@ static const uint64_t INDEX_VERSION = 3;

/// Emit the per-module summary section alongside the rest of
/// the module's bitcode.
void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
// By default we compile with ThinLTO if the module has a summary, but the
// client can request full LTO with a module flag.
bool IsThinLTO = true;
Expand Down Expand Up @@ -3689,8 +3704,7 @@ void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) {
if (ModHash)
// Save the written hash value.
std::copy(std::begin(Vals), std::end(Vals), std::begin(*ModHash));
} else if (ModHash)
Stream.EmitRecord(bitc::MODULE_CODE_HASH, ArrayRef<uint32_t>(*ModHash));
}
}

void ModuleBitcodeWriter::write() {
Expand Down Expand Up @@ -3985,3 +3999,160 @@ void llvm::WriteIndexToFile(

Out.write((char *)&Buffer.front(), Buffer.size());
}

/// Class to manage the bitcode writing for a thin link bitcode file.
class ThinLinkBitcodeWriter : public ModuleBitcodeWriterBase {
/// ModHash is for use in ThinLTO incremental build, generated while writing
/// the module bitcode file.
const ModuleHash *ModHash;

public:
ThinLinkBitcodeWriter(const Module *M, StringTableBuilder &StrtabBuilder,
BitstreamWriter &Stream,
const ModuleSummaryIndex &Index,
const ModuleHash &ModHash)
: ModuleBitcodeWriterBase(M, StrtabBuilder, Stream,
/*ShouldPreserveUseListOrder=*/false, &Index),
ModHash(&ModHash) {}

void write();

private:
void writeSimplifiedModuleInfo();
};

// This function writes a simpilified module info for thin link bitcode file.
// It only contains the source file name along with the name(the offset and
// size in strtab) and linkage for global values. For the global value info
// entry, in order to keep linkage at offset 5, there are three zeros used
// as padding.
void ThinLinkBitcodeWriter::writeSimplifiedModuleInfo() {
SmallVector<unsigned, 64> Vals;
// Emit the module's source file name.
{
StringEncoding Bits = getStringEncoding(M.getSourceFileName());
BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8);
if (Bits == SE_Char6)
AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6);
else if (Bits == SE_Fixed7)
AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7);

// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(AbbrevOpToUse);
unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv));

for (const auto P : M.getSourceFileName())
Vals.push_back((unsigned char)P);

Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev);
Vals.clear();
}

// Emit the global variable information.
for (const GlobalVariable &GV : M.globals()) {
// GLOBALVAR: [strtab offset, strtab size, 0, 0, 0, linkage]
Vals.push_back(StrtabBuilder.add(GV.getName()));
Vals.push_back(GV.getName().size());
Vals.push_back(0);
Vals.push_back(0);
Vals.push_back(0);
Vals.push_back(getEncodedLinkage(GV));

Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals);
Vals.clear();
}

// Emit the function proto information.
for (const Function &F : M) {
// FUNCTION: [strtab offset, strtab size, 0, 0, 0, linkage]
Vals.push_back(StrtabBuilder.add(F.getName()));
Vals.push_back(F.getName().size());
Vals.push_back(0);
Vals.push_back(0);
Vals.push_back(0);
Vals.push_back(getEncodedLinkage(F));

Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals);
Vals.clear();
}

// Emit the alias information.
for (const GlobalAlias &A : M.aliases()) {
// ALIAS: [strtab offset, strtab size, 0, 0, 0, linkage]
Vals.push_back(StrtabBuilder.add(A.getName()));
Vals.push_back(A.getName().size());
Vals.push_back(0);
Vals.push_back(0);
Vals.push_back(0);
Vals.push_back(getEncodedLinkage(A));

Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals);
Vals.clear();
}

// Emit the ifunc information.
for (const GlobalIFunc &I : M.ifuncs()) {
// IFUNC: [strtab offset, strtab size, 0, 0, 0, linkage]
Vals.push_back(StrtabBuilder.add(I.getName()));
Vals.push_back(I.getName().size());
Vals.push_back(0);
Vals.push_back(0);
Vals.push_back(0);
Vals.push_back(getEncodedLinkage(I));

Stream.EmitRecord(bitc::MODULE_CODE_IFUNC, Vals);
Vals.clear();
}
}

void ThinLinkBitcodeWriter::write() {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);

writeModuleVersion();

writeSimplifiedModuleInfo();

writePerModuleGlobalValueSummary();

// Write module hash.
Stream.EmitRecord(bitc::MODULE_CODE_HASH, ArrayRef<uint32_t>(*ModHash));

Stream.ExitBlock();
}

void BitcodeWriter::writeThinLinkBitcode(const Module *M,
const ModuleSummaryIndex &Index,
const ModuleHash &ModHash) {
assert(!WroteStrtab);

// The Mods vector is used by irsymtab::build, which requires non-const
// Modules in case it needs to materialize metadata. But the bitcode writer
// requires that the module is materialized, so we can cast to non-const here,
// after checking that it is in fact materialized.
assert(M->isMaterialized());
Mods.push_back(const_cast<Module *>(M));

ThinLinkBitcodeWriter ThinLinkWriter(M, StrtabBuilder, *Stream, Index,
ModHash);
ThinLinkWriter.write();
}

// Write the specified thin link bitcode file to the given raw output stream,
// where it will be written in a new bitcode block. This is used when
// writing the per-module index file for ThinLTO.
void llvm::WriteThinLinkBitcodeToFile(const Module *M, raw_ostream &Out,
const ModuleSummaryIndex &Index,
const ModuleHash &ModHash) {
SmallVector<char, 0> Buffer;
Buffer.reserve(256 * 1024);

BitcodeWriter Writer(Buffer);
Writer.writeThinLinkBitcode(M, Index, ModHash);
Writer.writeSymtab();
Writer.writeStrtab();

Out.write((char *)&Buffer.front(), Buffer.size());
}
22 changes: 9 additions & 13 deletions llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -378,15 +378,14 @@ void splitAndWriteThinLTOBitcode(
W.writeStrtab();
OS << Buffer;

// If a minimized bitcode module was requested for the thin link,
// strip the debug info (the merged module was already stripped above)
// and write it to the given OS.
// If a minimized bitcode module was requested for the thin link, only
// the information that is needed by thin link will be written in the
// given OS (the merged module will be written as usual).
if (ThinLinkOS) {
Buffer.clear();
BitcodeWriter W2(Buffer);
StripDebugInfo(M);
W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
/*GenerateHash=*/false, &ModHash);
W2.writeThinLinkBitcode(&M, Index, ModHash);
W2.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false,
&MergedMIndex);
W2.writeSymtab();
Expand Down Expand Up @@ -422,14 +421,11 @@ void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
ModuleHash ModHash = {{0}};
WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
/*GenerateHash=*/true, &ModHash);
// If a minimized bitcode module was requested for the thin link,
// strip the debug info and write it to the given OS.
if (ThinLinkOS) {
StripDebugInfo(M);
WriteBitcodeToFile(&M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false,
Index,
/*GenerateHash=*/false, &ModHash);
}
// If a minimized bitcode module was requested for the thin link, only
// the information that is needed by thin link will be written in the
// given OS.
if (ThinLinkOS && Index)
WriteThinLinkBitcodeToFile(&M, *ThinLinkOS, *Index, ModHash);
}

class WriteThinLTOBitcode : public ModulePass {
Expand Down
20 changes: 20 additions & 0 deletions llvm/test/ThinLTO/X86/distributed_import.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
; the debug metadata for the thin link.
; RUN: opt -thinlto-bc %s -thin-link-bitcode-file=%t1.thinlink.bc -o %t1.bc
; RUN: opt -thinlto-bc %p/Inputs/distributed_import.ll -thin-link-bitcode-file=%t2.thinlink.bc -o %t2.bc
; RUN: llvm-bcanalyzer -dump %t1.thinlink.bc | FileCheck --check-prefix=THINLINKBITCODE %s

; First perform the thin link on the normal bitcode file.
; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t.o -save-temps \
Expand Down Expand Up @@ -56,6 +57,25 @@ entry:
ret void
}

; THINLINKBITCODE-NOT: IDENTIFICATION_BLOCK_ID
; THINLINKBITCODE-NOT: BLOCKINFO_BLOCK
; THINLINKBITCODE-NOT: TYPE_BLOCK_ID
; THINLINKBITCODE-NOT: VSTOFFSET
; THINLINKBITCODE-NOT: CONSTANTS_BLOCK
; THINLINKBITCODE-NOT: METADATA_KIND_BLOCK
; THINLINKBITCODE-NOT: METADATA_BLOCK
; THINLINKBITCODE-NOT: OPERAND_BUNDLE_TAGS_BLOCK
; THINLINKBITCODE-NOT: UnknownBlock26
; THINLINKBITCODE-NOT: FUNCTION_BLOCK
; THINLINKBITCODE-NOT: VALUE_SYMTAB
; THINLINKBITCODE: MODULE_BLOCK
; THINLINKBITCODE: VERSION
; THINLINKBITCODE: SOURCE_FILENAME
; THINLINKBITCODE: GLOBALVAL_SUMMARY_BLOCK
; THINLINKBITCODE: HASH
; THINLINKBITCODE: SYMTAB_BLOCK
; THINLINKBITCODE: STRTAB_BLOCK

!llvm.dbg.cu = !{}

!1 = !{i32 2, !"Debug Info Version", i32 3}
Expand Down
8 changes: 0 additions & 8 deletions llvm/test/Transforms/ThinLTOBitcodeWriter/no-type-md.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
; the debug metadata for the thin link.
; RUN: opt -thinlto-bc -thin-link-bitcode-file=%t.thinlink.bc -o %t.bc %s
; RUN: llvm-dis -o - %t.bc | FileCheck %s
; RUN: llvm-dis -o - %t.thinlink.bc | FileCheck --check-prefix=NODEBUG %s
; RUN: llvm-bcanalyzer -dump %t.bc | FileCheck --check-prefix=BCA %s

; Make sure the combined index files produced by both the normal and the
Expand Down Expand Up @@ -35,10 +34,3 @@
define void @f() {
ret void
}

; CHECK: !llvm.dbg.cu
; NODEBUG-NOT: !llvm.dbg.cu
!llvm.dbg.cu = !{}

!1 = !{i32 2, !"Debug Info Version", i32 3}
!llvm.module.flags = !{!1}
11 changes: 0 additions & 11 deletions llvm/test/Transforms/ThinLTOBitcodeWriter/split.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
; RUN: not llvm-modextract -b -n 2 -o - %t 2>&1 | FileCheck --check-prefix=ERROR %s
; RUN: llvm-dis -o - %t0.bc | FileCheck --check-prefix=M0 %s
; RUN: llvm-dis -o - %t1.bc | FileCheck --check-prefix=M1 %s
; RUN: llvm-dis -o - %t0.thinlink.bc | FileCheck --check-prefix=NODEBUG %s
; RUN: llvm-dis -o - %t1.thinlink.bc | FileCheck --check-prefix=NODEBUG %s
; RUN: llvm-bcanalyzer -dump %t0.bc | FileCheck --check-prefix=BCA0 %s
; RUN: llvm-bcanalyzer -dump %t1.bc | FileCheck --check-prefix=BCA1 %s

Expand Down Expand Up @@ -44,12 +42,3 @@ define i8* @f() {

; M1: !0 = !{i32 0, !"typeid"}
!0 = !{i32 0, !"typeid"}

; M0: !llvm.dbg.cu
; M1-NOT: !llvm.dbg.cu
; NODEBUG-NOT: !llvm.dbg.cu
!llvm.dbg.cu = !{}

; M1: !{i32 1, !"ThinLTO", i32 0}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!llvm.module.flags = !{!1}