35 changes: 35 additions & 0 deletions llvm/test/Bitcode/module_hash.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
; Check per module hash.
; RUN: llvm-as -module-hash %s -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD1
; MOD1: <HASH op0={{[0-9]*}} op1={{[0-9]*}} op2={{[0-9]*}} op3={{[0-9]*}} op4={{[0-9]*}} (match)/>
; RUN: llvm-as -module-hash %p/Inputs/module_hash.ll -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD2
; MOD2: <HASH op0={{[0-9]*}} op1={{[0-9]*}} op2={{[0-9]*}} op3={{[0-9]*}} op4={{[0-9]*}} (match)/>

; Check that the hash matches in the combined index.

; First regenerate the modules with a summary
; RUN: llvm-as -module-hash -module-summary %s -o %t.m1.bc
; RUN: llvm-as -module-hash -module-summary %p/Inputs/module_hash.ll -o %t.m2.bc

; Recover the hashes from the modules themselves.
; RUN: llvm-bcanalyzer -dump %t1 | grep '<HASH' > %t.hash
; RUN: llvm-bcanalyzer -dump %t2 | grep '<HASH' >> %t.hash

; Generate the combined index and gather the hashes there.
; RUN: llvm-lto --thinlto-action=thinlink -o - %t.m1.bc %t.m2.bc | llvm-bcanalyzer -dump | grep '<HASH ' >> %t.hash

; Validate the output now, the hahes in the individual modules and the combined index are in the same file.
; RUN: cat %t.hash | FileCheck %s --check-prefix=COMBINED

; First capture the value of the hash for the two modules.
; COMBINED: <HASH op0=[[HASH1_1:[0-9]*]] op1=[[HASH1_2:[0-9]*]] op2=[[HASH1_3:[0-9]*]] op3=[[HASH1_4:[0-9]*]] op4=[[HASH1_5:[0-9]*]] (match)/>
; COMBINED: <HASH op0=[[HASH2_1:[0-9]*]] op1=[[HASH2_2:[0-9]*]] op2=[[HASH2_3:[0-9]*]] op3=[[HASH2_4:[0-9]*]] op4=[[HASH2_5:[0-9]*]] (match)/>

; Validate against the value extracted from the combined index
; COMBINED-DAG: <HASH abbrevid={{[0-9]*}} op0=[[HASH1_1]] op1=[[HASH1_2]] op2=[[HASH1_3]] op3=[[HASH1_4]] op4=[[HASH1_5]]/>
; COMBINED-DAG: <HASH abbrevid={{[0-9]*}} op0=[[HASH2_1]] op1=[[HASH2_2]] op2=[[HASH2_3]] op3=[[HASH2_4]] op4=[[HASH2_5]]/>


; Need a function for the combined index to be populated.
define void @foo() {
ret void
}
5 changes: 4 additions & 1 deletion llvm/tools/llvm-as/llvm-as.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ static cl::opt<bool> EmitSummaryIndex("module-summary",
cl::desc("Emit module summary index"),
cl::init(false));

static cl::opt<bool> EmitModuleHash("module-hash", cl::desc("Emit module hash"),
cl::init(false));

static cl::opt<bool>
DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden);

Expand Down Expand Up @@ -82,7 +85,7 @@ static void WriteOutputFile(const Module *M) {

if (Force || !CheckBitcodeOutputToConsole(Out->os(), true))
WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder,
EmitSummaryIndex);
EmitSummaryIndex, EmitModuleHash);

// Declare success.
Out->keep();
Expand Down
40 changes: 40 additions & 0 deletions llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include "llvm/Bitcode/BitstreamReader.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/Verifier.h"
Expand All @@ -38,8 +39,10 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <array>
#include <cctype>
#include <map>
#include <system_error>
Expand Down Expand Up @@ -174,6 +177,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED)
STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
STRINGIFY_CODE(MODULE_CODE, HASH)
}
case bitc::IDENTIFICATION_BLOCK_ID:
switch (CodeID) {
Expand Down Expand Up @@ -292,6 +296,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
default:
return nullptr;
STRINGIFY_CODE(MST_CODE, ENTRY)
STRINGIFY_CODE(MST_CODE, HASH)
}
case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
switch (CodeID) {
Expand Down Expand Up @@ -481,6 +486,9 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
if (Stream.EnterSubBlock(BlockID, &NumWords))
return Error("Malformed block record");

// Keep it for later, when we see a MODULE_HASH record
uint64_t BlockEntryPos = Stream.getCurrentByteNo();

const char *BlockName = nullptr;
if (DumpRecords) {
outs() << Indent << "<";
Expand Down Expand Up @@ -552,6 +560,7 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
++BlockStats.NumRecords;

StringRef Blob;
unsigned CurrentRecordPos = Stream.getCurrentByteNo();
unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);

// Increment the # occurrences of this code.
Expand Down Expand Up @@ -586,6 +595,37 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
for (unsigned i = 0, e = Record.size(); i != e; ++i)
outs() << " op" << i << "=" << (int64_t)Record[i];

// If we found a module hash, let's verify that it matches!
if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH) {
if (Record.size() != 5)
outs() << " (invalid)";
else {
// Recompute the hash and compare it to the one in the bitcode
SHA1 Hasher;
StringRef Hash;
{
int BlockSize = CurrentRecordPos - BlockEntryPos;
auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize);
Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize));
Hash = Hasher.result();
}
SmallString<20> RecordedHash;
RecordedHash.resize(20);
int Pos = 0;
for (auto &Val : Record) {
assert(!(Val >> 32) && "Unexpected high bits set");
RecordedHash[Pos++] = (Val >> 24) & 0xFF;
RecordedHash[Pos++] = (Val >> 16) & 0xFF;
RecordedHash[Pos++] = (Val >> 8) & 0xFF;
RecordedHash[Pos++] = (Val >> 0) & 0xFF;
}
if (Hash == RecordedHash)
outs() << " (match)";
else
outs() << " (!mismatch!)";
}
}

outs() << "/>";

if (Abbv) {
Expand Down