64 changes: 64 additions & 0 deletions llvm/tools/llvm-profgen/CallContext.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//===-- CallContext.h - Call Context Handler ---------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TOOLS_LLVM_PROFGEN_CALLCONTEXT_H
#define LLVM_TOOLS_LLVM_PROFGEN_CALLCONTEXT_H

#include "llvm/ProfileData/SampleProf.h"
#include <sstream>
#include <string>
#include <vector>

namespace llvm {
namespace sampleprof {

// Function name, LineLocation
typedef std::pair<std::string, LineLocation> FrameLocation;

typedef SmallVector<FrameLocation, 4> FrameLocationStack;

inline std::string getCallSite(const FrameLocation &Callsite) {
std::string CallsiteStr = Callsite.first;
CallsiteStr += ":";
CallsiteStr += Twine(Callsite.second.LineOffset).str();
if (Callsite.second.Discriminator > 0) {
CallsiteStr += ".";
CallsiteStr += Twine(Callsite.second.Discriminator).str();
}
return CallsiteStr;
}

// TODO: This operation is expansive. If it ever gets called multiple times we
// may think of making a class wrapper with internal states for it.
inline std::string getLocWithContext(const FrameLocationStack &Context) {
std::ostringstream OContextStr;
for (const auto &Callsite : Context) {
if (OContextStr.str().size())
OContextStr << " @ ";
OContextStr << getCallSite(Callsite);
}
return OContextStr.str();
}

// Reverse call context, i.e., in the order of callee frames to caller frames,
// is useful during instruction printing or pseudo probe printing.
inline std::string
getReversedLocWithContext(const FrameLocationStack &Context) {
std::ostringstream OContextStr;
for (const auto &Callsite : reverse(Context)) {
if (OContextStr.str().size())
OContextStr << " @ ";
OContextStr << getCallSite(Callsite);
}
return OContextStr.str();
}

} // end namespace sampleprof
} // end namespace llvm

#endif
48 changes: 48 additions & 0 deletions llvm/tools/llvm-profgen/ErrorHandling.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
//===-- ErrorHandling.h - Error handler -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TOOLS_LLVM_PROFGEN_ERRORHANDLING_H
#define LLVM_TOOLS_LLVM_PROFGEN_ERRORHANDLING_H

#include "llvm/ADT/Twine.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/WithColor.h"
#include <system_error>

using namespace llvm;

LLVM_ATTRIBUTE_NORETURN inline void
exitWithError(const Twine &Message, StringRef Whence = StringRef(),
StringRef Hint = StringRef()) {
WithColor::error(errs(), "llvm-profgen");
if (!Whence.empty())
errs() << Whence.str() << ": ";
errs() << Message << "\n";
if (!Hint.empty())
WithColor::note() << Hint.str() << "\n";
::exit(EXIT_FAILURE);
}

LLVM_ATTRIBUTE_NORETURN inline void
exitWithError(std::error_code EC, StringRef Whence = StringRef()) {
exitWithError(EC.message(), Whence);
}

LLVM_ATTRIBUTE_NORETURN inline void exitWithError(Error E, StringRef Whence) {
exitWithError(errorToErrorCode(std::move(E)), Whence);
}

template <typename T, typename... Ts>
T unwrapOrError(Expected<T> EO, Ts &&... Args) {
if (EO)
return std::move(*EO);
exitWithError(EO.takeError(), std::forward<Ts>(Args)...);
}
#endif
21 changes: 21 additions & 0 deletions llvm/tools/llvm-profgen/LLVMBuild.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
;===- ./tools/llvm-profgen/LLVMBuild.txt ----------------------*- Conf -*--===;
;
; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
; See https://llvm.org/LICENSE.txt for license information.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;

[component_0]
type = Tool
name = llvm-profgen
parent = Tools
required_libraries = DebugInfoDWARF MC MCDisassembler MCParser Object all-targets Demangle Support
131 changes: 131 additions & 0 deletions llvm/tools/llvm-profgen/PerfReader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
//===-- PerfReader.cpp - perfscript reader ---------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "PerfReader.h"

static cl::opt<bool> ShowMmapEvents("show-mmap-events", cl::ReallyHidden,
cl::init(false), cl::ZeroOrMore,
cl::desc("Print binary load events."));

namespace llvm {
namespace sampleprof {

PerfReader::PerfReader(cl::list<std::string> &BinaryFilenames) {
// Load the binaries.
for (auto Filename : BinaryFilenames)
loadBinary(Filename, /*AllowNameConflict*/ false);
}

ProfiledBinary &PerfReader::loadBinary(const StringRef BinaryPath,
bool AllowNameConflict) {
// The binary table is currently indexed by the binary name not the full
// binary path. This is because the user-given path may not match the one
// that was actually executed.
StringRef BinaryName = llvm::sys::path::filename(BinaryPath);

// Call to load the binary in the ctor of ProfiledBinary.
auto Ret = BinaryTable.insert({BinaryName, ProfiledBinary(BinaryPath)});

if (!Ret.second && !AllowNameConflict) {
std::string ErrorMsg = "Binary name conflict: " + BinaryPath.str() +
" and " + Ret.first->second.getPath().str() + " \n";
exitWithError(ErrorMsg);
}

return Ret.first->second;
}

void PerfReader::updateBinaryAddress(const MMapEvent &Event) {
// Load the binary.
StringRef BinaryPath = Event.BinaryPath;
StringRef BinaryName = llvm::sys::path::filename(BinaryPath);

auto I = BinaryTable.find(BinaryName);
// Drop the event which doesn't belong to user-provided binaries
// or if its image is loaded at the same address
if (I == BinaryTable.end() || Event.BaseAddress == I->second.getBaseAddress())
return;

ProfiledBinary &Binary = I->second;

// A binary image could be uploaded and then reloaded at different
// place, so update the address map here
AddrToBinaryMap.erase(Binary.getBaseAddress());
AddrToBinaryMap[Event.BaseAddress] = &Binary;

// Update binary load address.
Binary.setBaseAddress(Event.BaseAddress);
}

void PerfReader::parseMMap2Event(TraceStream &TraceIt) {
// Parse a line like:
// PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0
// 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so
constexpr static const char *const Pattern =
"PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: "
"\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
"(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)";
// Field 0 - whole line
// Field 1 - PID
// Field 2 - base address
// Field 3 - mmapped size
// Field 4 - page offset
// Field 5 - binary path
enum EventIndex {
WHOLE_LINE = 0,
PID = 1,
BASE_ADDRESS = 2,
MMAPPED_SIZE = 3,
PAGE_OFFSET = 4,
BINARY_PATH = 5
};

Regex RegMmap2(Pattern);
SmallVector<StringRef, 6> Fields;
bool R = RegMmap2.match(TraceIt.getCurrentLine(), &Fields);
if (!R) {
std::string ErrorMsg = "Cannot parse mmap event: Line" +
Twine(TraceIt.getLineNumber()).str() + ": " +
TraceIt.getCurrentLine().str() + " \n";
exitWithError(ErrorMsg);
}
MMapEvent Event;
Fields[PID].getAsInteger(10, Event.PID);
Fields[BASE_ADDRESS].getAsInteger(0, Event.BaseAddress);
Fields[MMAPPED_SIZE].getAsInteger(0, Event.Size);
Fields[PAGE_OFFSET].getAsInteger(0, Event.Offset);
Event.BinaryPath = Fields[BINARY_PATH];
updateBinaryAddress(Event);
if (ShowMmapEvents) {
outs() << "Mmap: Binary " << Event.BinaryPath << " loaded at "
<< format("0x%" PRIx64 ":", Event.BaseAddress) << " \n";
}
}

void PerfReader::parseEvent(TraceStream &TraceIt) {
if (TraceIt.getCurrentLine().startswith("PERF_RECORD_MMAP2"))
parseMMap2Event(TraceIt);

TraceIt.advance();
}

void PerfReader::parseTrace(StringRef Filename) {
// Trace line iterator
TraceStream TraceIt(Filename);
while (!TraceIt.isAtEoF()) {
parseEvent(TraceIt);
}
}

void PerfReader::parsePerfTraces(cl::list<std::string> &PerfTraceFilenames) {
// Parse perf traces.
for (auto Filename : PerfTraceFilenames)
parseTrace(Filename);
}

} // end namespace sampleprof
} // end namespace llvm
102 changes: 102 additions & 0 deletions llvm/tools/llvm-profgen/PerfReader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
//===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
#define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
#include "ErrorHandling.h"
#include "ProfiledBinary.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Regex.h"
#include <fstream>
#include <list>
#include <map>
#include <vector>

using namespace llvm;
using namespace sampleprof;

namespace llvm {
namespace sampleprof {

// Stream based trace line iterator
class TraceStream {
std::string CurrentLine;
std::ifstream Fin;
bool IsAtEoF = false;
uint64_t LineNumber = 0;

public:
TraceStream(StringRef Filename) : Fin(Filename.str()) {
if (!Fin.good())
exitWithError("Error read input perf script file", Filename);
advance();
}

StringRef getCurrentLine() {
assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
return CurrentLine;
}

uint64_t getLineNumber() { return LineNumber; }

bool isAtEoF() { return IsAtEoF; }

// Read the next line
void advance() {
if (!std::getline(Fin, CurrentLine)) {
IsAtEoF = true;
return;
}
LineNumber++;
}
};

// Filename to binary map
using BinaryMap = StringMap<ProfiledBinary>;
// Address to binary map for fast look-up
using AddressBinaryMap = std::map<uint64_t, ProfiledBinary *>;

// Load binaries and read perf trace to parse the events and samples
class PerfReader {

BinaryMap BinaryTable;
AddressBinaryMap AddrToBinaryMap; // Used by address-based lookup.

// The parsed MMap event
struct MMapEvent {
pid_t PID = 0;
uint64_t BaseAddress = 0;
uint64_t Size = 0;
uint64_t Offset = 0;
StringRef BinaryPath;
};

/// Load symbols and disassemble the code of a give binary.
/// Also register the binary in the binary table.
///
ProfiledBinary &loadBinary(const StringRef BinaryPath,
bool AllowNameConflict = true);
void updateBinaryAddress(const MMapEvent &Event);

public:
PerfReader(cl::list<std::string> &BinaryFilenames);

/// Parse a single line of a PERF_RECORD_MMAP2 event looking for a
/// mapping between the binary name and its memory layout.
///
void parseMMap2Event(TraceStream &TraceIt);
void parseEvent(TraceStream &TraceIt);
// Parse perf events and samples
void parseTrace(StringRef Filename);
void parsePerfTraces(cl::list<std::string> &PerfTraceFilenames);
};

} // end namespace sampleprof
} // end namespace llvm

#endif
312 changes: 312 additions & 0 deletions llvm/tools/llvm-profgen/ProfiledBinary.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,312 @@
//===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "ProfiledBinary.h"
#include "ErrorHandling.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/X86TargetParser.h"

#define DEBUG_TYPE "load-binary"

using namespace llvm;

static cl::opt<bool> ShowDisassembly("show-disassembly", cl::ReallyHidden,
cl::init(false), cl::ZeroOrMore,
cl::desc("Print disassembled code."));

static cl::opt<bool> ShowSourceLocations("show-source-locations",
cl::ReallyHidden, cl::init(false),
cl::ZeroOrMore,
cl::desc("Print source locations."));

namespace llvm {
namespace sampleprof {

static const Target *getTarget(const ObjectFile *Obj) {
Triple TheTriple = Obj->makeTriple();
std::string Error;
std::string ArchName;
const Target *TheTarget =
TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
if (!TheTarget)
exitWithError(Error, Obj->getFileName());
return TheTarget;
}

template <class ELFT>
static uint64_t getELFImageLMAForSec(const ELFFile<ELFT> *Obj,
const object::ELFSectionRef &Sec,
StringRef FileName) {
// Search for a PT_LOAD segment containing the requested section. Return this
// segment's p_addr as the image load address for the section.
const auto &PhdrRange = unwrapOrError(Obj->program_headers(), FileName);
for (const typename ELFT::Phdr &Phdr : PhdrRange)
if ((Phdr.p_type == ELF::PT_LOAD) && (Phdr.p_vaddr <= Sec.getAddress()) &&
(Phdr.p_vaddr + Phdr.p_memsz > Sec.getAddress()))
// Segments will always be loaded at a page boundary.
return Phdr.p_paddr & ~(Phdr.p_align - 1U);
return 0;
}

// Get the image load address for a specific section. Note that an image is
// loaded by segments (a group of sections) and segments may not be consecutive
// in memory.
static uint64_t getELFImageLMAForSec(const object::ELFSectionRef &Sec) {
if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Sec.getObject()))
return getELFImageLMAForSec(ELFObj->getELFFile(), Sec,
ELFObj->getFileName());
else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Sec.getObject()))
return getELFImageLMAForSec(ELFObj->getELFFile(), Sec,
ELFObj->getFileName());
else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Sec.getObject()))
return getELFImageLMAForSec(ELFObj->getELFFile(), Sec,
ELFObj->getFileName());
const auto *ELFObj = cast<ELF64BEObjectFile>(Sec.getObject());
return getELFImageLMAForSec(ELFObj->getELFFile(), Sec, ELFObj->getFileName());
}

void ProfiledBinary::load() {
// Attempt to open the binary.
OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
Binary &Binary = *OBinary.getBinary();

auto *Obj = dyn_cast<ELFObjectFileBase>(&Binary);
if (!Obj)
exitWithError("not a valid Elf image", Path);

TheTriple = Obj->makeTriple();
// Current only support X86
if (!TheTriple.isX86())
exitWithError("unsupported target", TheTriple.getTriple());
LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");

// Find the preferred base address for text sections.
setPreferredBaseAddress(Obj);

// Disassemble the text sections.
disassemble(Obj);

// TODO: decode other sections.

return;
}

void ProfiledBinary::setPreferredBaseAddress(const ELFObjectFileBase *Obj) {
for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
SI != SE; ++SI) {
const SectionRef &Section = *SI;
if (Section.isText()) {
PreferredBaseAddress = getELFImageLMAForSec(Section);
return;
}
}
exitWithError("no text section found", Obj->getFileName());
}

bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
SectionSymbolsTy &Symbols,
const SectionRef &Section) {

std::size_t SE = Symbols.size();
uint64_t SectionOffset = Section.getAddress() - PreferredBaseAddress;
uint64_t SectSize = Section.getSize();
uint64_t StartOffset = Symbols[SI].Addr - PreferredBaseAddress;
uint64_t EndOffset = (SI + 1 < SE)
? Symbols[SI + 1].Addr - PreferredBaseAddress
: SectionOffset + SectSize;
if (StartOffset >= EndOffset)
return true;

std::string &&SymbolName = Symbols[SI].Name.str();
if (ShowDisassembly)
outs() << '<' << SymbolName << ">:\n";

uint64_t Offset = StartOffset;
while (Offset < EndOffset) {
MCInst Inst;
uint64_t Size;
// Disassemble an instruction.
if (!DisAsm->getInstruction(Inst, Size, Bytes.slice(Offset - SectionOffset),
Offset + PreferredBaseAddress, nulls()))
return false;

if (ShowDisassembly) {
outs() << format("%8" PRIx64 ":", Offset);
size_t Start = outs().tell();
IP->printInst(&Inst, Offset + Size, "", *STI.get(), outs());
if (ShowSourceLocations) {
unsigned Cur = outs().tell() - Start;
if (Cur < 40)
outs().indent(40 - Cur);
InstructionPointer Inst(this, Offset);
outs() << getReversedLocWithContext(symbolize(Inst));
}
outs() << "\n";
}

const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());

// Populate address maps.
CodeAddrs.push_back(Offset);
if (MCDesc.isCall())
CallAddrs.insert(Offset);
else if (MCDesc.isReturn())
RetAddrs.insert(Offset);

Offset += Size;
}

if (ShowDisassembly)
outs() << "\n";

FuncStartAddrMap[StartOffset] = Symbols[SI].Name.str();
return true;
}

void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) {
const Target *TheTarget = getTarget(Obj);
std::string TripleName = TheTriple.getTriple();
StringRef FileName = Obj->getFileName();

MRI.reset(TheTarget->createMCRegInfo(TripleName));
if (!MRI)
exitWithError("no register info for target " + TripleName, FileName);

MCTargetOptions MCOptions;
AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
if (!AsmInfo)
exitWithError("no assembly info for target " + TripleName, FileName);

SubtargetFeatures Features = Obj->getFeatures();
STI.reset(
TheTarget->createMCSubtargetInfo(TripleName, "", Features.getString()));
if (!STI)
exitWithError("no subtarget info for target " + TripleName, FileName);

MII.reset(TheTarget->createMCInstrInfo());
if (!MII)
exitWithError("no instruction info for target " + TripleName, FileName);

MCObjectFileInfo MOFI;
MCContext Ctx(AsmInfo.get(), MRI.get(), &MOFI);
MOFI.InitMCObjectFileInfo(Triple(TripleName), false, Ctx);
DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx));
if (!DisAsm)
exitWithError("no disassembler for target " + TripleName, FileName);

MIA.reset(TheTarget->createMCInstrAnalysis(MII.get()));

int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
IP.reset(TheTarget->createMCInstPrinter(Triple(TripleName), AsmPrinterVariant,
*AsmInfo, *MII, *MRI));
IP->setPrintBranchImmAsAddress(true);
}

void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
// Set up disassembler and related components.
setUpDisassembler(Obj);

// Create a mapping from virtual address to symbol name. The symbols in text
// sections are the candidates to dissassemble.
std::map<SectionRef, SectionSymbolsTy> AllSymbols;
StringRef FileName = Obj->getFileName();
for (const SymbolRef &Symbol : Obj->symbols()) {
const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
if (SecI != Obj->section_end())
AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE));
}

// Sort all the symbols. Use a stable sort to stabilize the output.
for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
stable_sort(SecSyms.second);

if (ShowDisassembly)
outs() << "\nDisassembly of " << FileName << ":\n";

// Dissassemble a text section.
for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
SI != SE; ++SI) {
const SectionRef &Section = *SI;
if (!Section.isText())
continue;

uint64_t ImageLoadAddr = PreferredBaseAddress;
uint64_t SectionOffset = Section.getAddress() - ImageLoadAddr;
uint64_t SectSize = Section.getSize();
if (!SectSize)
continue;

// Register the text section.
TextSections.insert({SectionOffset, SectSize});

if (ShowDisassembly) {
StringRef SectionName = unwrapOrError(Section.getName(), FileName);
outs() << "\nDisassembly of section " << SectionName;
outs() << " [" << format("0x%" PRIx64, SectionOffset) << ", "
<< format("0x%" PRIx64, SectionOffset + SectSize) << "]:\n\n";
}

// Get the section data.
ArrayRef<uint8_t> Bytes =
arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName));

// Get the list of all the symbols in this section.
SectionSymbolsTy &Symbols = AllSymbols[Section];

// Disassemble symbol by symbol.
for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
if (!dissassembleSymbol(SI, Bytes, Symbols, Section))
exitWithError("disassembling error", FileName);
}
}
}

void ProfiledBinary::setupSymbolizer() {
symbolize::LLVMSymbolizer::Options SymbolizerOpts;
SymbolizerOpts.PrintFunctions =
DILineInfoSpecifier::FunctionNameKind::LinkageName;
SymbolizerOpts.Demangle = false;
SymbolizerOpts.DefaultArch = TheTriple.getArchName().str();
SymbolizerOpts.UseSymbolTable = false;
SymbolizerOpts.RelativeAddresses = false;
Symbolizer = std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts);
}

FrameLocationStack ProfiledBinary::symbolize(const InstructionPointer &IP) {
assert(this == IP.Binary &&
"Binary should only symbolize its own instruction");
auto Addr = object::SectionedAddress{IP.Offset + PreferredBaseAddress,
object::SectionedAddress::UndefSection};
DIInliningInfo InlineStack =
unwrapOrError(Symbolizer->symbolizeInlinedCode(Path, Addr), getName());

FrameLocationStack CallStack;

for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
const auto &CallerFrame = InlineStack.getFrame(I);
if (CallerFrame.FunctionName == "<invalid>")
break;
LineLocation Line(CallerFrame.Line - CallerFrame.StartLine,
CallerFrame.Discriminator);
FrameLocation Callsite(CallerFrame.FunctionName, Line);
CallStack.push_back(Callsite);
}

return CallStack;
}

} // end namespace sampleprof
} // end namespace llvm
126 changes: 126 additions & 0 deletions llvm/tools/llvm-profgen/ProfiledBinary.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
//===-- ProfiledBinary.h - Binary decoder -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
#define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H

#include "CallContext.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/Path.h"
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>

using namespace llvm::object;

namespace llvm {
namespace sampleprof {

class ProfiledBinary;

struct InstructionPointer {
ProfiledBinary *Binary;
// Offset to the base address of the executable segment of the binary.
uint64_t Offset;
// Index to the sorted code address array of the binary.
uint64_t Index;

InstructionPointer(ProfiledBinary *Binary, uint64_t Offset)
: Binary(Binary), Offset(Offset) {
Index = 0;
}
};

class ProfiledBinary {
// Absolute path of the binary.
std::string Path;
// The target triple.
Triple TheTriple;
// The runtime base address that the executable sections are loaded at.
mutable uint64_t BaseAddress = 0;
// The preferred base address that the executable sections are loaded at.
uint64_t PreferredBaseAddress = 0;
// Mutiple MC component info
std::unique_ptr<const MCRegisterInfo> MRI;
std::unique_ptr<const MCAsmInfo> AsmInfo;
std::unique_ptr<const MCSubtargetInfo> STI;
std::unique_ptr<const MCInstrInfo> MII;
std::unique_ptr<MCDisassembler> DisAsm;
std::unique_ptr<const MCInstrAnalysis> MIA;
std::unique_ptr<MCInstPrinter> IP;
// A list of text sections sorted by start RVA and size. Used to check
// if a given RVA is a valid code address.
std::set<std::pair<uint64_t, uint64_t>> TextSections;
// Function offset to name mapping.
std::unordered_map<uint64_t, std::string> FuncStartAddrMap;
// An array of offsets of all instructions sorted in increasing order. The
// sorting is needed to fast advance to the next forward/backward instruction.
std::vector<uint64_t> CodeAddrs;
// A set of call instruction offsets. Used by virtual unwinding.
std::unordered_set<uint64_t> CallAddrs;
// A set of return instruction offsets. Used by virtual unwinding.
std::unordered_set<uint64_t> RetAddrs;

// The symbolizer used to get inline context for an instruction.
std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;

void setPreferredBaseAddress(const ELFObjectFileBase *O);

// Set up disassembler and related components.
void setUpDisassembler(const ELFObjectFileBase *Obj);
void setupSymbolizer();

/// Dissassemble the text section and build various address maps.
void disassemble(const ELFObjectFileBase *O);

/// Helper function to dissassemble the symbol and extract info for unwinding
bool dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
SectionSymbolsTy &Symbols, const SectionRef &Section);
/// Symbolize a given instruction pointer and return a full call context.
FrameLocationStack symbolize(const InstructionPointer &I);

/// Decode the interesting parts of the binary and build internal data
/// structures. On high level, the parts of interest are:
/// 1. Text sections, including the main code section and the PLT
/// entries that will be used to handle cross-module call transitions.
/// 2. The .debug_line section, used by Dwarf-based profile generation.
/// 3. Pseudo probe related sections, used by probe-based profile
/// generation.
void load();

public:
ProfiledBinary(StringRef Path) : Path(Path) {
setupSymbolizer();
load();
}

const StringRef getPath() const { return Path; }
const StringRef getName() const { return llvm::sys::path::filename(Path); }
uint64_t getBaseAddress() const { return BaseAddress; }
void setBaseAddress(uint64_t Address) { BaseAddress = Address; }
};

} // end namespace sampleprof
} // end namespace llvm

#endif
53 changes: 53 additions & 0 deletions llvm/tools/llvm-profgen/llvm-profgen.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//===- llvm-profgen.cpp - LLVM SPGO profile generation tool ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// llvm-profgen generates SPGO profiles from perf script ouput.
//
//===----------------------------------------------------------------------===//

#include "ErrorHandling.h"
#include "PerfReader.h"
#include "ProfiledBinary.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/TargetSelect.h"

static cl::list<std::string> PerfTraceFilenames(
"perfscript", cl::value_desc("perfscript"), cl::OneOrMore,
llvm::cl::MiscFlags::CommaSeparated,
cl::desc("Path of perf-script trace created by Linux perf tool with "
"`script` command(the raw perf.data should be profiled with -b)"));

static cl::list<std::string>
BinaryFilenames("binary", cl::value_desc("binary"), cl::OneOrMore,
llvm::cl::MiscFlags::CommaSeparated,
cl::desc("Path of profiled binary files"));

static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
cl::Required,
cl::desc("Output profile file"));

using namespace llvm;
using namespace sampleprof;

int main(int argc, const char *argv[]) {
InitLLVM X(argc, argv);

cl::ParseCommandLineOptions(argc, argv, "llvm SPGO profile generator\n");

// Initialize targets and assembly printers/parsers.
InitializeAllTargetInfos();
InitializeAllTargetMCs();
InitializeAllDisassemblers();

// Load binaries and parse perf events and samples
PerfReader Reader(BinaryFilenames);
Reader.parsePerfTraces(PerfTraceFilenames);

return EXIT_SUCCESS;
}