Skip to content

Commit

Permalink
Merge pull request #1653 from JohanEngelen/pgo-icp
Browse files Browse the repository at this point in the history
[PGO] Add Indirect Call instrumentation and promotion.
  • Loading branch information
redstar committed Jul 30, 2016
2 parents 5451186 + de86a0a commit be63398
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 8 deletions.
89 changes: 82 additions & 7 deletions gen/pgo.cpp
Expand Up @@ -21,6 +21,7 @@
#include "init.h"
#include "statement.h"
#include "llvm.h"
#include "gen/cl_helpers.h"
#include "gen/irstate.h"
#include "gen/logger.h"
#include "gen/recursivevisitor.h"
Expand All @@ -33,6 +34,15 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MD5.h"

#if LDC_LLVM_VER >= 309
namespace {
llvm::cl::opt<bool, false, opts::FlagParser<bool>> enablePGOIndirectCalls(
"pgo-indirect-calls",
llvm::cl::desc("(*) Enable PGO of indirect calls (LLVM >= 3.9)"),
llvm::cl::init(true), llvm::cl::Hidden);
}
#endif

/// \brief Stable hasher for PGO region counters.
///
/// PGOHash produces a stable hash of a given function's control flow.
Expand Down Expand Up @@ -890,12 +900,20 @@ void CodeGenPGO::emitCounterIncrement(const RootObject *S) const {
void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
const FuncDeclaration *fd) {
RegionCounts.clear();
if (auto E =
PGOReader->getFunctionCounts(FuncName, FunctionHash, RegionCounts)) {

#if LDC_LLVM_VER >= 309
llvm::Expected<llvm::InstrProfRecord> RecordExpected =
PGOReader->getInstrProfRecord(FuncName, FunctionHash);
auto EC = RecordExpected.takeError();
#else
auto EC = PGOReader->getFunctionCounts(FuncName, FunctionHash, RegionCounts);
#endif

if (EC) {
#if LDC_LLVM_VER >= 309
auto IPE = llvm::InstrProfError::take(std::move(E));
auto IPE = llvm::InstrProfError::take(std::move(EC));
#else
auto IPE = E;
auto IPE = EC;
#endif
if (IPE == llvm::instrprof_error::unknown_function) {
IF_LOG Logger::println("No profile data for function: %s",
Expand Down Expand Up @@ -925,10 +943,17 @@ void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
FuncName.c_str());
}
RegionCounts.clear();
} else {
IF_LOG Logger::println("Loaded profile counts for function: %s",
FuncName.c_str());
return;
}

#if LDC_LLVM_VER >= 309
ProfRecord =
llvm::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
RegionCounts = ProfRecord->Counts;
#endif

IF_LOG Logger::println("Loaded profile data for function: %s",
FuncName.c_str());
}

/// \brief Calculate what to divide by to scale weights.
Expand Down Expand Up @@ -1054,4 +1079,54 @@ llvm::MDNode *CodeGenPGO::createProfileWeightsForeachRange(
std::max(CondCount, LoopCount) - LoopCount);
}

void CodeGenPGO::emitIndirectCallPGO(llvm::Instruction *callSite,
llvm::Value *funcPtr) {
#if LDC_LLVM_VER >= 309
if (enablePGOIndirectCalls)
valueProfile(llvm::IPVK_IndirectCallTarget, callSite, funcPtr, true);
#endif
}

void CodeGenPGO::valueProfile(uint32_t valueKind, llvm::Instruction *valueSite,
llvm::Value *value, bool ptrCastNeeded) {
#if LDC_LLVM_VER >= 309
if (!value || !valueSite)
return;

bool instrumentValueSites = global.params.genInstrProf && emitInstrumentation;
if (instrumentValueSites && RegionCounterMap) {
// Instrumentation must be inserted just before the valueSite instruction.
// Save the current insertion point to be able to restore it later.
auto savedInsertPoint = gIR->ir->saveIP();
gIR->ir->SetInsertPoint(valueSite);

if (ptrCastNeeded)
value = gIR->ir->CreatePtrToInt(value, gIR->ir->getInt64Ty());

auto *i8PtrTy = llvm::Type::getInt8PtrTy(gIR->context());
llvm::Value *Args[5] = {
llvm::ConstantExpr::getBitCast(FuncNameVar, i8PtrTy),
gIR->ir->getInt64(FunctionHash), value, gIR->ir->getInt32(valueKind),
gIR->ir->getInt32(NumValueSites[valueKind])};
gIR->ir->CreateCall(GET_INTRINSIC_DECL(instrprof_value_profile), Args);

gIR->ir->restoreIP(savedInsertPoint);

NumValueSites[valueKind]++;
return;
}

if (ProfRecord) {
if (NumValueSites[valueKind] >= ProfRecord->getNumValueSites(valueKind))
return;

llvm::annotateValueSite(gIR->module, *valueSite, *ProfRecord,
static_cast<llvm::InstrProfValueKind>(valueKind),
NumValueSites[valueKind]);

NumValueSites[valueKind]++;
}
#endif // LLVM >= 3.9
}

#endif // LDC_WITH_PGO
37 changes: 36 additions & 1 deletion gen/pgo.h
Expand Up @@ -19,8 +19,10 @@
#define LDC_GEN_PGO_H

#include "gen/llvm.h"
#include "llvm/ProfileData/InstrProf.h"
#include <string>
#include <vector>
#include <array>

namespace llvm {
class GlobalVariable;
Expand Down Expand Up @@ -79,14 +81,26 @@ class CodeGenPGO {
static InstTy *addBranchWeights(InstTy *I, llvm::MDNode *) {
return I;
}

void emitIndirectCallPGO(llvm::Instruction *callSite, llvm::Value *funcPtr) {}

void valueProfile(uint32_t valueKind, llvm::Instruction *valueSite,
llvm::Value *value, bool ptrCastNeeded) {}
};

#else

/// Keeps per-function PGO state.
class CodeGenPGO {
public:
CodeGenPGO() : NumRegionCounters(0), FunctionHash(0), CurrentRegionCount(0) {}
CodeGenPGO()
: NumRegionCounters(0), FunctionHash(0), CurrentRegionCount(0)
#if LDC_LLVM_VER >= 309
,
NumValueSites({{0}})
#endif
{
}

/// Whether or not we have PGO region data for the current function. This is
/// false both when we have no data at all and when our data has been
Expand Down Expand Up @@ -158,6 +172,22 @@ class CodeGenPGO {
return I;
}

/// Adds profiling instrumentation/annotation of indirect calls to `funcPtr`
/// for callsite `callSite`.
/// Does nothing for LLVM < 3.9.
void emitIndirectCallPGO(llvm::Instruction *callSite, llvm::Value *funcPtr);

/// Adds profiling instrumentation/annotation of a certain value.
/// This method either inserts a call to the profile run-time during
/// instrumentation or puts profile data into metadata for PGO use.
/// The profiled value is of kind `valueKind`, will be added right before IR
/// code site `valueSite`, and the to be profiled value is given by
/// `value`. `value` should be of LLVM i64 type, unless `ptrCastNeeded` is
/// true, in which case a ptrtoint cast to i64 is added.
/// Does nothing for LLVM < 3.9.
void valueProfile(uint32_t valueKind, llvm::Instruction *valueSite,
llvm::Value *value, bool ptrCastNeeded);

private:
std::string FuncName;
llvm::GlobalVariable *FuncNameVar;
Expand All @@ -170,6 +200,11 @@ class CodeGenPGO {
std::vector<uint64_t> RegionCounts;
uint64_t CurrentRegionCount;

#if LDC_LLVM_VER >= 309
std::array<unsigned, llvm::IPVK_Last + 1> NumValueSites;
std::unique_ptr<llvm::InstrProfRecord> ProfRecord;
#endif

/// \brief A flag that is set to false when instrumentation code should not be
/// emitted for this function.
bool emitInstrumentation = true;
Expand Down
9 changes: 9 additions & 0 deletions gen/tocall.cpp
Expand Up @@ -887,6 +887,15 @@ DValue *DtoCallFunction(Loc &loc, Type *resulttype, DValue *fnval,
// call the function
LLCallSite call = gIR->func()->scopes->callOrInvoke(callable, args);

#if LDC_LLVM_VER >= 309
// PGO: Insert instrumentation or attach profile metadata at indirect call
// sites.
if (!call.getCalledFunction()) {
auto &PGO = gIR->func()->pgo;
PGO.emitIndirectCallPGO(call.getInstruction(), callable);
}
#endif

// get return value
const int sretArgIndex =
(irFty.arg_sret && irFty.arg_this && gABI->passThisBeforeSret(tf) ? 1
Expand Down
67 changes: 67 additions & 0 deletions tests/PGO/indirect_calls.d
@@ -0,0 +1,67 @@
// Test instrumentation of indirect calls

// REQUIRES: atleast_llvm309

// RUN: %ldc -c -output-ll -fprofile-instr-generate -of=%t.ll %s && FileCheck %s --check-prefix=PROFGEN < %t.ll

// RUN: %ldc -fprofile-instr-generate=%t.profraw -run %s \
// RUN: && %profdata merge %t.profraw -o %t.profdata \
// RUN: && %ldc -O3 -c -output-ll -of=%t2.ll -fprofile-instr-use=%t.profdata %s \
// RUN: && FileCheck %s -check-prefix=PROFUSE < %t2.ll

import ldc.attributes : weak;

extern (C)
{ // simplify name mangling for simpler string matching

@weak // disable reasoning about this function
void hot()
{
}

void luke()
{
}

void cold()
{
}

void function() foo;

@weak // disable reasoning about this function
void select_func(int i)
{
if (i < 1700)
foo = &hot;
else if (i < 1990)
foo = &luke;
else
foo = &cold;
}

} // extern C

// PROFGEN-LABEL: @_Dmain(
// PROFUSE-LABEL: @_Dmain(
int main()
{
for (int i; i < 2000; ++i)
{
select_func(i);

// PROFGEN: [[REG1:%[0-9]+]] = load void ()*, void ()** @foo
// PROFGEN-NEXT: [[REG2:%[0-9]+]] = ptrtoint void ()* [[REG1]] to i64
// PROFGEN-NEXT: call void @__llvm_profile_instrument_target(i64 [[REG2]], i8* bitcast ({{.*}}_Dmain to i8*), i32 0)
// PROFGEN-NEXT: call void [[REG1]]()

// PROFUSE: [[REG1:%[0-9]+]] = load void ()*, void ()** @foo
// PROFUSE: [[REG2:%[0-9]+]] = icmp eq void ()* [[REG1]], @hot
// PROFUSE: call void @hot()
// PROFUSE: call void [[REG1]]()

foo();
}

return 0;
}

0 comments on commit be63398

Please sign in to comment.