Skip to content

Commit

Permalink
Reland: [Coverage] Revise format to reduce binary size
Browse files Browse the repository at this point in the history
Try again with an up-to-date version of D69471 (9931712 was a stale
revision).

---

Revise the coverage mapping format to reduce binary size by:

1. Naming function records and marking them `linkonce_odr`, and
2. Compressing filenames.

This shrinks the size of llc's coverage segment by 82% (334MB -> 62MB)
and speeds up end-to-end single-threaded report generation by 10%. For
reference the compressed name data in llc is 81MB (__llvm_prf_names).

Rationale for changes to the format:

- With the current format, most coverage function records are discarded.
  E.g., more than 97% of the records in llc are *duplicate* placeholders
  for functions visible-but-not-used in TUs. Placeholders *are* used to
  show under-covered functions, but duplicate placeholders waste space.

- We reached general consensus about giving (1) a try at the 2017 code
  coverage BoF [1]. The thinking was that using `linkonce_odr` to merge
  duplicates is simpler than alternatives like teaching build systems
  about a coverage-aware database/module/etc on the side.

- Revising the format is expensive due to the backwards compatibility
  requirement, so we might as well compress filenames while we're at it.
  This shrinks the encoded filenames in llc by 86% (12MB -> 1.6MB).

See CoverageMappingFormat.rst for the details on what exactly has
changed.

Fixes PR34533 [2], hopefully.

[1] http://lists.llvm.org/pipermail/llvm-dev/2017-October/118428.html
[2] https://bugs.llvm.org/show_bug.cgi?id=34533

Differential Revision: https://reviews.llvm.org/D69471
  • Loading branch information
vedantk committed Feb 29, 2020
1 parent 3388871 commit dd1ea9d
Show file tree
Hide file tree
Showing 24 changed files with 846 additions and 314 deletions.
134 changes: 79 additions & 55 deletions clang/lib/CodeGen/CoverageMappingGen.cpp
Expand Up @@ -13,6 +13,8 @@
#include "CoverageMappingGen.h"
#include "CodeGenFunction.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
Expand All @@ -24,6 +26,10 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"

// This selects the coverage mapping format defined when `InstrProfData.inc`
// is textually included.
#define COVMAP_V3

using namespace clang;
using namespace CodeGen;
using namespace llvm::coverage;
Expand Down Expand Up @@ -1272,12 +1278,6 @@ struct CounterCoverageMappingBuilder
}
};

std::string getCoverageSection(const CodeGenModule &CGM) {
return llvm::getInstrProfSectionName(
llvm::IPSK_covmap,
CGM.getContext().getTargetInfo().getTriple().getObjectFormat());
}

std::string normalizeFilename(StringRef Filename) {
llvm::SmallString<256> Path(Filename);
llvm::sys::fs::make_absolute(Path);
Expand Down Expand Up @@ -1317,30 +1317,71 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName,
}
}

void CoverageMappingModuleGen::addFunctionMappingRecord(
llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash,
const std::string &CoverageMapping, bool IsUsed) {
static std::string getInstrProfSection(const CodeGenModule &CGM,
llvm::InstrProfSectKind SK) {
return llvm::getInstrProfSectionName(
SK, CGM.getContext().getTargetInfo().getTriple().getObjectFormat());
}

void CoverageMappingModuleGen::emitFunctionMappingRecord(
const FunctionInfo &Info, uint64_t FilenamesRef) {
llvm::LLVMContext &Ctx = CGM.getLLVMContext();
if (!FunctionRecordTy) {

// Assign a name to the function record. This is used to merge duplicates.
std::string FuncRecordName = "__covrec_" + llvm::utohexstr(Info.NameHash);

// A dummy description for a function included-but-not-used in a TU can be
// replaced by full description provided by a different TU. The two kinds of
// descriptions play distinct roles: therefore, assign them different names
// to prevent `linkonce_odr` merging.
if (Info.IsUsed)
FuncRecordName += "u";

// Create the function record type.
const uint64_t NameHash = Info.NameHash;
const uint64_t FuncHash = Info.FuncHash;
const std::string &CoverageMapping = Info.CoverageMapping;
#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) LLVMType,
llvm::Type *FunctionRecordTypes[] = {
#include "llvm/ProfileData/InstrProfData.inc"
};
FunctionRecordTy =
llvm::StructType::get(Ctx, makeArrayRef(FunctionRecordTypes),
/*isPacked=*/true);
}
llvm::Type *FunctionRecordTypes[] = {
#include "llvm/ProfileData/InstrProfData.inc"
};
auto *FunctionRecordTy =
llvm::StructType::get(Ctx, makeArrayRef(FunctionRecordTypes),
/*isPacked=*/true);

#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Init,
// Create the function record constant.
#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Init,
llvm::Constant *FunctionRecordVals[] = {
#include "llvm/ProfileData/InstrProfData.inc"
};
FunctionRecords.push_back(llvm::ConstantStruct::get(
FunctionRecordTy, makeArrayRef(FunctionRecordVals)));
auto *FuncRecordConstant = llvm::ConstantStruct::get(
FunctionRecordTy, makeArrayRef(FunctionRecordVals));

// Create the function record global.
auto *FuncRecord = new llvm::GlobalVariable(
CGM.getModule(), FunctionRecordTy, /*isConstant=*/true,
llvm::GlobalValue::LinkOnceODRLinkage, FuncRecordConstant,
FuncRecordName);
FuncRecord->setVisibility(llvm::GlobalValue::HiddenVisibility);
FuncRecord->setSection(getInstrProfSection(CGM, llvm::IPSK_covfun));
FuncRecord->setAlignment(llvm::Align(8));
if (CGM.supportsCOMDAT())
FuncRecord->setComdat(CGM.getModule().getOrInsertComdat(FuncRecordName));

// Make sure the data doesn't get deleted.
CGM.addUsedGlobal(FuncRecord);
}

void CoverageMappingModuleGen::addFunctionMappingRecord(
llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash,
const std::string &CoverageMapping, bool IsUsed) {
llvm::LLVMContext &Ctx = CGM.getLLVMContext();
const uint64_t NameHash = llvm::IndexedInstrProf::ComputeHash(NameValue);
FunctionRecords.push_back({NameHash, FuncHash, CoverageMapping, IsUsed});

if (!IsUsed)
FunctionNames.push_back(
llvm::ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx)));
CoverageMappings.push_back(CoverageMapping);

if (CGM.getCodeGenOpts().DumpCoverageMapping) {
// Dump the coverage mapping data for this function by decoding the
Expand Down Expand Up @@ -1385,37 +1426,22 @@ void CoverageMappingModuleGen::emit() {
FilenameRefs[I] = FilenameStrs[I];
}

std::string FilenamesAndCoverageMappings;
llvm::raw_string_ostream OS(FilenamesAndCoverageMappings);
CoverageFilenamesSectionWriter(FilenameRefs).write(OS);

// Stream the content of CoverageMappings to OS while keeping
// memory consumption under control.
size_t CoverageMappingSize = 0;
for (auto &S : CoverageMappings) {
CoverageMappingSize += S.size();
OS << S;
S.clear();
S.shrink_to_fit();
std::string Filenames;
{
llvm::raw_string_ostream OS(Filenames);
CoverageFilenamesSectionWriter(FilenameRefs).write(OS);
}
CoverageMappings.clear();
CoverageMappings.shrink_to_fit();
auto *FilenamesVal =
llvm::ConstantDataArray::getString(Ctx, Filenames, false);
const int64_t FilenamesRef = llvm::IndexedInstrProf::ComputeHash(Filenames);

size_t FilenamesSize = OS.str().size() - CoverageMappingSize;
// Append extra zeroes if necessary to ensure that the size of the filenames
// and coverage mappings is a multiple of 8.
if (size_t Rem = OS.str().size() % 8) {
CoverageMappingSize += 8 - Rem;
OS.write_zeros(8 - Rem);
}
auto *FilenamesAndMappingsVal =
llvm::ConstantDataArray::getString(Ctx, OS.str(), false);

// Create the deferred function records array
auto RecordsTy =
llvm::ArrayType::get(FunctionRecordTy, FunctionRecords.size());
auto RecordsVal = llvm::ConstantArray::get(RecordsTy, FunctionRecords);
// Emit the function records.
for (const FunctionInfo &Info : FunctionRecords)
emitFunctionMappingRecord(Info, FilenamesRef);

const unsigned NRecords = 0;
const size_t FilenamesSize = Filenames.size();
const unsigned CoverageMappingSize = 0;
llvm::Type *CovDataHeaderTypes[] = {
#define COVMAP_HEADER(Type, LLVMType, Name, Init) LLVMType,
#include "llvm/ProfileData/InstrProfData.inc"
Expand All @@ -1430,18 +1456,16 @@ void CoverageMappingModuleGen::emit() {
CovDataHeaderTy, makeArrayRef(CovDataHeaderVals));

// Create the coverage data record
llvm::Type *CovDataTypes[] = {CovDataHeaderTy, RecordsTy,
FilenamesAndMappingsVal->getType()};
llvm::Type *CovDataTypes[] = {CovDataHeaderTy, FilenamesVal->getType()};
auto CovDataTy = llvm::StructType::get(Ctx, makeArrayRef(CovDataTypes));
llvm::Constant *TUDataVals[] = {CovDataHeaderVal, RecordsVal,
FilenamesAndMappingsVal};
llvm::Constant *TUDataVals[] = {CovDataHeaderVal, FilenamesVal};
auto CovDataVal =
llvm::ConstantStruct::get(CovDataTy, makeArrayRef(TUDataVals));
auto CovData = new llvm::GlobalVariable(
CGM.getModule(), CovDataTy, true, llvm::GlobalValue::InternalLinkage,
CGM.getModule(), CovDataTy, true, llvm::GlobalValue::PrivateLinkage,
CovDataVal, llvm::getCoverageMappingVarName());

CovData->setSection(getCoverageSection(CGM));
CovData->setSection(getInstrProfSection(CGM, llvm::IPSK_covmap));
CovData->setAlignment(llvm::Align(8));

// Make sure the data doesn't get deleted.
Expand Down
18 changes: 14 additions & 4 deletions clang/lib/CodeGen/CoverageMappingGen.h
Expand Up @@ -47,17 +47,27 @@ class CodeGenModule;
/// Organizes the cross-function state that is used while generating
/// code coverage mapping data.
class CoverageMappingModuleGen {
/// Information needed to emit a coverage record for a function.
struct FunctionInfo {
uint64_t NameHash;
uint64_t FuncHash;
std::string CoverageMapping;
bool IsUsed;
};

CodeGenModule &CGM;
CoverageSourceInfo &SourceInfo;
llvm::SmallDenseMap<const FileEntry *, unsigned, 8> FileEntries;
std::vector<llvm::Constant *> FunctionRecords;
std::vector<llvm::Constant *> FunctionNames;
llvm::StructType *FunctionRecordTy;
std::vector<std::string> CoverageMappings;
std::vector<FunctionInfo> FunctionRecords;

/// Emit a function record.
void emitFunctionMappingRecord(const FunctionInfo &Info,
uint64_t FilenamesRef);

public:
CoverageMappingModuleGen(CodeGenModule &CGM, CoverageSourceInfo &SourceInfo)
: CGM(CGM), SourceInfo(SourceInfo), FunctionRecordTy(nullptr) {}
: CGM(CGM), SourceInfo(SourceInfo) {}

CoverageSourceInfo &getSourceInfo() const {
return SourceInfo;
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CoverageMapping/abspath.cpp
@@ -1,12 +1,12 @@
// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -emit-llvm -main-file-name abspath.cpp %S/Inputs/../abspath.cpp -o - | FileCheck -check-prefix=RMDOTS %s
// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -mllvm -enable-name-compression=false -emit-llvm -main-file-name abspath.cpp %S/Inputs/../abspath.cpp -o - | FileCheck -check-prefix=RMDOTS %s

// RMDOTS: @__llvm_coverage_mapping = {{.*}}"\01
// RMDOTS-NOT: Inputs
// RMDOTS: "

// RUN: mkdir -p %t/test && cd %t/test
// RUN: echo "void f1() {}" > f1.c
// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -emit-llvm -main-file-name abspath.cpp ../test/f1.c -o - | FileCheck -check-prefix=RELPATH %s
// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -mllvm -enable-name-compression=false -emit-llvm -main-file-name abspath.cpp ../test/f1.c -o - | FileCheck -check-prefix=RELPATH %s

// RELPATH: @__llvm_coverage_mapping = {{.*}}"\01
// RELPATH: {{[/\\].*(/|\\\\)test(/|\\\\)f1}}.c
Expand Down
25 changes: 22 additions & 3 deletions clang/test/CoverageMapping/ir.c
@@ -1,12 +1,31 @@
// Check the data structures emitted by coverage mapping
// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name ir.c %s -o - -emit-llvm -fprofile-instrument=clang -fcoverage-mapping | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name ir.c %s -o - -emit-llvm -fprofile-instrument=clang -fcoverage-mapping -mllvm -enable-name-compression=false | FileCheck %s -check-prefixes=COMMON,DARWIN
// RUN: %clang_cc1 -triple x86_64--windows-msvc -main-file-name ir.c %s -o - -emit-llvm -fprofile-instrument=clang -fcoverage-mapping -mllvm -enable-name-compression=false | FileCheck %s -check-prefixes=COMMON,WINDOWS

static inline void unused() {}

void foo(void) { }
void foo(void) {}

int main(void) {
foo();
return 0;
}

// CHECK: @__llvm_coverage_mapping = internal constant { { i32, i32, i32, i32 }, [2 x <{ i64, i32, i64 }>], [{{[0-9]+}} x i8] } { { i32, i32, i32, i32 } { i32 2, i32 {{[0-9]+}}, i32 {{[0-9]+}}, i32 {{[0-9]+}} }, [2 x <{ i64, i32, i64 }>] [<{{.*}}> <{{.*}}>, <{{.*}}> <{{.*}}>]
// Check the function records. Two of the record names should come in the 'used'
// flavor, and one should not.

// DARWIN: [[FuncRecord1:@__covrec_[0-9A-F]+u]] = linkonce_odr hidden constant <{ i64, i32, i64, i64, [{{.*}} x i8] }> <{ {{.*}} }>, section "__LLVM_COV,__llvm_covfun", align 8
// DARWIN: [[FuncRecord2:@__covrec_[0-9A-F]+u]] = linkonce_odr hidden constant <{ i64, i32, i64, i64, [{{.*}} x i8] }> <{ {{.*}} }>, section "__LLVM_COV,__llvm_covfun", align 8
// DARWIN: [[FuncRecord3:@__covrec_[0-9A-F]+]] = linkonce_odr hidden constant <{ i64, i32, i64, i64, [{{.*}} x i8] }> <{ {{.*}} }>, section "__LLVM_COV,__llvm_covfun", align 8
// DARWIN: @__llvm_coverage_mapping = private constant { { i32, i32, i32, i32 }, [{{.*}} x i8] } { {{.*}} }, section "__LLVM_COV,__llvm_covmap", align 8

// WINDOWS: [[FuncRecord1:@__covrec_[0-9A-F]+u]] = linkonce_odr hidden constant <{ i64, i32, i64, i64, [{{.*}} x i8] }> <{ {{.*}} }>, section ".lcovfun$M", comdat, align 8
// WINDOWS: [[FuncRecord2:@__covrec_[0-9A-F]+u]] = linkonce_odr hidden constant <{ i64, i32, i64, i64, [{{.*}} x i8] }> <{ {{.*}} }>, section ".lcovfun$M", comdat, align 8
// WINDOWS: [[FuncRecord3:@__covrec_[0-9A-F]+]] = linkonce_odr hidden constant <{ i64, i32, i64, i64, [{{.*}} x i8] }> <{ {{.*}} }>, section ".lcovfun$M", comdat, align 8
// WINDOWS: @__llvm_coverage_mapping = private constant { { i32, i32, i32, i32 }, [{{.*}} x i8] } { {{.*}} }, section ".lcovmap$M", align 8

// COMMON: @llvm.used = appending global [{{.*}} x i8*]
// COMMON-SAME: [[FuncRecord1]]
// COMMON-SAME: [[FuncRecord2]]
// COMMON-SAME: [[FuncRecord3]]
// COMMON-SAME: @__llvm_coverage_mapping
7 changes: 5 additions & 2 deletions clang/test/Profile/def-assignop.cpp
Expand Up @@ -18,9 +18,12 @@ struct A {
// PGOGEN: {{.*}}add{{.*}}%pgocount, 1
// PGOGEN: store{{.*}}@__profc__ZN1AaSEOS_

// Check that coverage mapping includes 6 function records including the
// Check that coverage mapping includes 3 function records including the
// defaulted copy and move operators: A::operator=
// COVMAP: @__llvm_coverage_mapping = {{.*}} { { i32, i32, i32, i32 }, [3 x <{{.*}}>],
// COVMAP: section "__llvm_covfun", comdat
// COVMAP: section "__llvm_covfun", comdat
// COVMAP: section "__llvm_covfun", comdat
// COVMAP: @__llvm_coverage_mapping = {{.*}} { { i32, i32, i32, i32 }
B b;
};

Expand Down
10 changes: 7 additions & 3 deletions clang/test/Profile/def-ctors.cpp
Expand Up @@ -20,11 +20,15 @@ struct Derived : public Base {
// PGOGEN-DAG: {{.*}}add{{.*}}%pgocount, 1
// PGOGEN-DAG: store{{.*}}@__profc__ZN7DerivedC2Ev

// Check that coverage mapping has 6 function records including
// Check that coverage mapping has 5 function records including
// the defaulted Derived::Derived(const Derived), and Derived::Derived()
// methds.
// COVMAP: @__llvm_coverage_mapping = {{.*}} { { i32, i32, i32, i32 }, [5 x
// <{{.*}}>],
// COVMAP: section "__llvm_covfun", comdat
// COVMAP: section "__llvm_covfun", comdat
// COVMAP: section "__llvm_covfun", comdat
// COVMAP: section "__llvm_covfun", comdat
// COVMAP: section "__llvm_covfun", comdat
// COVMAP: @__llvm_coverage_mapping = {{.*}} { { i32, i32, i32, i32 }
};

Derived dd;
Expand Down
10 changes: 7 additions & 3 deletions clang/test/Profile/def-dtors.cpp
Expand Up @@ -16,10 +16,14 @@ struct Derived : public Base {
// PGOGEN: {{.*}}add{{.*}}%pgocount, 1
// PGOGEN: store{{.*}}@__profc__ZN7DerivedD2Ev

// Check that coverage mapping has 6 function records including
// Check that coverage mapping has 5 function records including
// the default destructor in the derived class.
// COVMAP: @__llvm_coverage_mapping = {{.*}} { { i32, i32, i32, i32 }, [5 x
// <{{.*}}>],
// COVMAP: section "__llvm_covfun", comdat
// COVMAP: section "__llvm_covfun", comdat
// COVMAP: section "__llvm_covfun", comdat
// COVMAP: section "__llvm_covfun", comdat
// COVMAP: section "__llvm_covfun", comdat
// COVMAP: @__llvm_coverage_mapping = {{.*}} { { i32, i32, i32, i32 }
};

int main() {
Expand Down

0 comments on commit dd1ea9d

Please sign in to comment.