| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| //===-- CallContext.h - Call Context Handler ---------------------*- C++-*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_TOOLS_LLVM_PROFGEN_CALLCONTEXT_H | ||
| #define LLVM_TOOLS_LLVM_PROFGEN_CALLCONTEXT_H | ||
|
|
||
| #include "llvm/ProfileData/SampleProf.h" | ||
| #include <sstream> | ||
| #include <string> | ||
| #include <vector> | ||
|
|
||
| namespace llvm { | ||
| namespace sampleprof { | ||
|
|
||
| // Function name, LineLocation | ||
| typedef std::pair<std::string, LineLocation> FrameLocation; | ||
|
|
||
| typedef SmallVector<FrameLocation, 4> FrameLocationStack; | ||
|
|
||
| inline std::string getCallSite(const FrameLocation &Callsite) { | ||
| std::string CallsiteStr = Callsite.first; | ||
| CallsiteStr += ":"; | ||
| CallsiteStr += Twine(Callsite.second.LineOffset).str(); | ||
| if (Callsite.second.Discriminator > 0) { | ||
| CallsiteStr += "."; | ||
| CallsiteStr += Twine(Callsite.second.Discriminator).str(); | ||
| } | ||
| return CallsiteStr; | ||
| } | ||
|
|
||
| // TODO: This operation is expansive. If it ever gets called multiple times we | ||
| // may think of making a class wrapper with internal states for it. | ||
| inline std::string getLocWithContext(const FrameLocationStack &Context) { | ||
| std::ostringstream OContextStr; | ||
| for (const auto &Callsite : Context) { | ||
| if (OContextStr.str().size()) | ||
| OContextStr << " @ "; | ||
| OContextStr << getCallSite(Callsite); | ||
| } | ||
| return OContextStr.str(); | ||
| } | ||
|
|
||
| // Reverse call context, i.e., in the order of callee frames to caller frames, | ||
| // is useful during instruction printing or pseudo probe printing. | ||
| inline std::string | ||
| getReversedLocWithContext(const FrameLocationStack &Context) { | ||
| std::ostringstream OContextStr; | ||
| for (const auto &Callsite : reverse(Context)) { | ||
| if (OContextStr.str().size()) | ||
| OContextStr << " @ "; | ||
| OContextStr << getCallSite(Callsite); | ||
| } | ||
| return OContextStr.str(); | ||
| } | ||
|
|
||
| } // end namespace sampleprof | ||
| } // end namespace llvm | ||
|
|
||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| //===-- ErrorHandling.h - Error handler -------------------------*- C++ -*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_TOOLS_LLVM_PROFGEN_ERRORHANDLING_H | ||
| #define LLVM_TOOLS_LLVM_PROFGEN_ERRORHANDLING_H | ||
|
|
||
| #include "llvm/ADT/Twine.h" | ||
| #include "llvm/Support/Errc.h" | ||
| #include "llvm/Support/Error.h" | ||
| #include "llvm/Support/ErrorOr.h" | ||
| #include "llvm/Support/WithColor.h" | ||
| #include <system_error> | ||
|
|
||
| using namespace llvm; | ||
|
|
||
| LLVM_ATTRIBUTE_NORETURN inline void | ||
| exitWithError(const Twine &Message, StringRef Whence = StringRef(), | ||
| StringRef Hint = StringRef()) { | ||
| WithColor::error(errs(), "llvm-profgen"); | ||
| if (!Whence.empty()) | ||
| errs() << Whence.str() << ": "; | ||
| errs() << Message << "\n"; | ||
| if (!Hint.empty()) | ||
| WithColor::note() << Hint.str() << "\n"; | ||
| ::exit(EXIT_FAILURE); | ||
| } | ||
|
|
||
| LLVM_ATTRIBUTE_NORETURN inline void | ||
| exitWithError(std::error_code EC, StringRef Whence = StringRef()) { | ||
| exitWithError(EC.message(), Whence); | ||
| } | ||
|
|
||
| LLVM_ATTRIBUTE_NORETURN inline void exitWithError(Error E, StringRef Whence) { | ||
| exitWithError(errorToErrorCode(std::move(E)), Whence); | ||
| } | ||
|
|
||
| template <typename T, typename... Ts> | ||
| T unwrapOrError(Expected<T> EO, Ts &&... Args) { | ||
| if (EO) | ||
| return std::move(*EO); | ||
| exitWithError(EO.takeError(), std::forward<Ts>(Args)...); | ||
| } | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| ;===- ./tools/llvm-profgen/LLVMBuild.txt ----------------------*- Conf -*--===; | ||
| ; | ||
| ; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| ; See https://llvm.org/LICENSE.txt for license information. | ||
| ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| ; | ||
| ;===------------------------------------------------------------------------===; | ||
| ; | ||
| ; This is an LLVMBuild description file for the components in this subdirectory. | ||
| ; | ||
| ; For more information on the LLVMBuild system, please see: | ||
| ; | ||
| ; http://llvm.org/docs/LLVMBuild.html | ||
| ; | ||
| ;===------------------------------------------------------------------------===; | ||
|
|
||
| [component_0] | ||
| type = Tool | ||
| name = llvm-profgen | ||
| parent = Tools | ||
| required_libraries = DebugInfoDWARF MC MCDisassembler MCParser Object all-targets Demangle Support |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,131 @@ | ||
| //===-- PerfReader.cpp - perfscript reader ---------------------*- C++ -*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| #include "PerfReader.h" | ||
|
|
||
| static cl::opt<bool> ShowMmapEvents("show-mmap-events", cl::ReallyHidden, | ||
| cl::init(false), cl::ZeroOrMore, | ||
| cl::desc("Print binary load events.")); | ||
|
|
||
| namespace llvm { | ||
| namespace sampleprof { | ||
|
|
||
| PerfReader::PerfReader(cl::list<std::string> &BinaryFilenames) { | ||
| // Load the binaries. | ||
| for (auto Filename : BinaryFilenames) | ||
| loadBinary(Filename, /*AllowNameConflict*/ false); | ||
| } | ||
|
|
||
| ProfiledBinary &PerfReader::loadBinary(const StringRef BinaryPath, | ||
| bool AllowNameConflict) { | ||
| // The binary table is currently indexed by the binary name not the full | ||
| // binary path. This is because the user-given path may not match the one | ||
| // that was actually executed. | ||
| StringRef BinaryName = llvm::sys::path::filename(BinaryPath); | ||
|
|
||
| // Call to load the binary in the ctor of ProfiledBinary. | ||
| auto Ret = BinaryTable.insert({BinaryName, ProfiledBinary(BinaryPath)}); | ||
|
|
||
| if (!Ret.second && !AllowNameConflict) { | ||
| std::string ErrorMsg = "Binary name conflict: " + BinaryPath.str() + | ||
| " and " + Ret.first->second.getPath().str() + " \n"; | ||
| exitWithError(ErrorMsg); | ||
| } | ||
|
|
||
| return Ret.first->second; | ||
| } | ||
|
|
||
| void PerfReader::updateBinaryAddress(const MMapEvent &Event) { | ||
| // Load the binary. | ||
| StringRef BinaryPath = Event.BinaryPath; | ||
| StringRef BinaryName = llvm::sys::path::filename(BinaryPath); | ||
|
|
||
| auto I = BinaryTable.find(BinaryName); | ||
| // Drop the event which doesn't belong to user-provided binaries | ||
| // or if its image is loaded at the same address | ||
| if (I == BinaryTable.end() || Event.BaseAddress == I->second.getBaseAddress()) | ||
| return; | ||
|
|
||
| ProfiledBinary &Binary = I->second; | ||
|
|
||
| // A binary image could be uploaded and then reloaded at different | ||
| // place, so update the address map here | ||
| AddrToBinaryMap.erase(Binary.getBaseAddress()); | ||
| AddrToBinaryMap[Event.BaseAddress] = &Binary; | ||
|
|
||
| // Update binary load address. | ||
| Binary.setBaseAddress(Event.BaseAddress); | ||
| } | ||
|
|
||
| void PerfReader::parseMMap2Event(TraceStream &TraceIt) { | ||
| // Parse a line like: | ||
| // PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0 | ||
| // 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so | ||
| constexpr static const char *const Pattern = | ||
| "PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: " | ||
| "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ " | ||
| "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)"; | ||
| // Field 0 - whole line | ||
| // Field 1 - PID | ||
| // Field 2 - base address | ||
| // Field 3 - mmapped size | ||
| // Field 4 - page offset | ||
| // Field 5 - binary path | ||
| enum EventIndex { | ||
| WHOLE_LINE = 0, | ||
| PID = 1, | ||
| BASE_ADDRESS = 2, | ||
| MMAPPED_SIZE = 3, | ||
| PAGE_OFFSET = 4, | ||
| BINARY_PATH = 5 | ||
| }; | ||
|
|
||
| Regex RegMmap2(Pattern); | ||
| SmallVector<StringRef, 6> Fields; | ||
| bool R = RegMmap2.match(TraceIt.getCurrentLine(), &Fields); | ||
| if (!R) { | ||
| std::string ErrorMsg = "Cannot parse mmap event: Line" + | ||
| Twine(TraceIt.getLineNumber()).str() + ": " + | ||
| TraceIt.getCurrentLine().str() + " \n"; | ||
| exitWithError(ErrorMsg); | ||
| } | ||
| MMapEvent Event; | ||
| Fields[PID].getAsInteger(10, Event.PID); | ||
| Fields[BASE_ADDRESS].getAsInteger(0, Event.BaseAddress); | ||
| Fields[MMAPPED_SIZE].getAsInteger(0, Event.Size); | ||
| Fields[PAGE_OFFSET].getAsInteger(0, Event.Offset); | ||
| Event.BinaryPath = Fields[BINARY_PATH]; | ||
| updateBinaryAddress(Event); | ||
| if (ShowMmapEvents) { | ||
| outs() << "Mmap: Binary " << Event.BinaryPath << " loaded at " | ||
| << format("0x%" PRIx64 ":", Event.BaseAddress) << " \n"; | ||
| } | ||
| } | ||
|
|
||
| void PerfReader::parseEvent(TraceStream &TraceIt) { | ||
| if (TraceIt.getCurrentLine().startswith("PERF_RECORD_MMAP2")) | ||
| parseMMap2Event(TraceIt); | ||
|
|
||
| TraceIt.advance(); | ||
| } | ||
|
|
||
| void PerfReader::parseTrace(StringRef Filename) { | ||
| // Trace line iterator | ||
| TraceStream TraceIt(Filename); | ||
| while (!TraceIt.isAtEoF()) { | ||
| parseEvent(TraceIt); | ||
| } | ||
| } | ||
|
|
||
| void PerfReader::parsePerfTraces(cl::list<std::string> &PerfTraceFilenames) { | ||
| // Parse perf traces. | ||
| for (auto Filename : PerfTraceFilenames) | ||
| parseTrace(Filename); | ||
| } | ||
|
|
||
| } // end namespace sampleprof | ||
| } // end namespace llvm |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,102 @@ | ||
| //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H | ||
| #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H | ||
| #include "ErrorHandling.h" | ||
| #include "ProfiledBinary.h" | ||
| #include "llvm/Support/CommandLine.h" | ||
| #include "llvm/Support/Regex.h" | ||
| #include <fstream> | ||
| #include <list> | ||
| #include <map> | ||
| #include <vector> | ||
|
|
||
| using namespace llvm; | ||
| using namespace sampleprof; | ||
|
|
||
| namespace llvm { | ||
| namespace sampleprof { | ||
|
|
||
| // Stream based trace line iterator | ||
| class TraceStream { | ||
| std::string CurrentLine; | ||
| std::ifstream Fin; | ||
| bool IsAtEoF = false; | ||
| uint64_t LineNumber = 0; | ||
|
|
||
| public: | ||
| TraceStream(StringRef Filename) : Fin(Filename.str()) { | ||
| if (!Fin.good()) | ||
| exitWithError("Error read input perf script file", Filename); | ||
| advance(); | ||
| } | ||
|
|
||
| StringRef getCurrentLine() { | ||
| assert(!IsAtEoF && "Line iterator reaches the End-of-File!"); | ||
| return CurrentLine; | ||
| } | ||
|
|
||
| uint64_t getLineNumber() { return LineNumber; } | ||
|
|
||
| bool isAtEoF() { return IsAtEoF; } | ||
|
|
||
| // Read the next line | ||
| void advance() { | ||
| if (!std::getline(Fin, CurrentLine)) { | ||
| IsAtEoF = true; | ||
| return; | ||
| } | ||
| LineNumber++; | ||
| } | ||
| }; | ||
|
|
||
| // Filename to binary map | ||
| using BinaryMap = StringMap<ProfiledBinary>; | ||
| // Address to binary map for fast look-up | ||
| using AddressBinaryMap = std::map<uint64_t, ProfiledBinary *>; | ||
|
|
||
| // Load binaries and read perf trace to parse the events and samples | ||
| class PerfReader { | ||
|
|
||
| BinaryMap BinaryTable; | ||
| AddressBinaryMap AddrToBinaryMap; // Used by address-based lookup. | ||
|
|
||
| // The parsed MMap event | ||
| struct MMapEvent { | ||
| pid_t PID = 0; | ||
| uint64_t BaseAddress = 0; | ||
| uint64_t Size = 0; | ||
| uint64_t Offset = 0; | ||
| StringRef BinaryPath; | ||
| }; | ||
|
|
||
| /// Load symbols and disassemble the code of a give binary. | ||
| /// Also register the binary in the binary table. | ||
| /// | ||
| ProfiledBinary &loadBinary(const StringRef BinaryPath, | ||
| bool AllowNameConflict = true); | ||
| void updateBinaryAddress(const MMapEvent &Event); | ||
|
|
||
| public: | ||
| PerfReader(cl::list<std::string> &BinaryFilenames); | ||
|
|
||
| /// Parse a single line of a PERF_RECORD_MMAP2 event looking for a | ||
| /// mapping between the binary name and its memory layout. | ||
| /// | ||
| void parseMMap2Event(TraceStream &TraceIt); | ||
| void parseEvent(TraceStream &TraceIt); | ||
| // Parse perf events and samples | ||
| void parseTrace(StringRef Filename); | ||
| void parsePerfTraces(cl::list<std::string> &PerfTraceFilenames); | ||
| }; | ||
|
|
||
| } // end namespace sampleprof | ||
| } // end namespace llvm | ||
|
|
||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,312 @@ | ||
| //===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "ProfiledBinary.h" | ||
| #include "ErrorHandling.h" | ||
| #include "MCTargetDesc/X86MCTargetDesc.h" | ||
| #include "llvm/ADT/Triple.h" | ||
| #include "llvm/Demangle/Demangle.h" | ||
| #include "llvm/Support/CommandLine.h" | ||
| #include "llvm/Support/Format.h" | ||
| #include "llvm/Support/TargetRegistry.h" | ||
| #include "llvm/Support/TargetSelect.h" | ||
| #include "llvm/Support/X86TargetParser.h" | ||
|
|
||
| #define DEBUG_TYPE "load-binary" | ||
|
|
||
| using namespace llvm; | ||
|
|
||
| static cl::opt<bool> ShowDisassembly("show-disassembly", cl::ReallyHidden, | ||
| cl::init(false), cl::ZeroOrMore, | ||
| cl::desc("Print disassembled code.")); | ||
|
|
||
| static cl::opt<bool> ShowSourceLocations("show-source-locations", | ||
| cl::ReallyHidden, cl::init(false), | ||
| cl::ZeroOrMore, | ||
| cl::desc("Print source locations.")); | ||
|
|
||
| namespace llvm { | ||
| namespace sampleprof { | ||
|
|
||
| static const Target *getTarget(const ObjectFile *Obj) { | ||
| Triple TheTriple = Obj->makeTriple(); | ||
| std::string Error; | ||
| std::string ArchName; | ||
| const Target *TheTarget = | ||
| TargetRegistry::lookupTarget(ArchName, TheTriple, Error); | ||
| if (!TheTarget) | ||
| exitWithError(Error, Obj->getFileName()); | ||
| return TheTarget; | ||
| } | ||
|
|
||
| template <class ELFT> | ||
| static uint64_t getELFImageLMAForSec(const ELFFile<ELFT> *Obj, | ||
| const object::ELFSectionRef &Sec, | ||
| StringRef FileName) { | ||
| // Search for a PT_LOAD segment containing the requested section. Return this | ||
| // segment's p_addr as the image load address for the section. | ||
| const auto &PhdrRange = unwrapOrError(Obj->program_headers(), FileName); | ||
| for (const typename ELFT::Phdr &Phdr : PhdrRange) | ||
| if ((Phdr.p_type == ELF::PT_LOAD) && (Phdr.p_vaddr <= Sec.getAddress()) && | ||
| (Phdr.p_vaddr + Phdr.p_memsz > Sec.getAddress())) | ||
| // Segments will always be loaded at a page boundary. | ||
| return Phdr.p_paddr & ~(Phdr.p_align - 1U); | ||
| return 0; | ||
| } | ||
|
|
||
| // Get the image load address for a specific section. Note that an image is | ||
| // loaded by segments (a group of sections) and segments may not be consecutive | ||
| // in memory. | ||
| static uint64_t getELFImageLMAForSec(const object::ELFSectionRef &Sec) { | ||
| if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Sec.getObject())) | ||
| return getELFImageLMAForSec(ELFObj->getELFFile(), Sec, | ||
| ELFObj->getFileName()); | ||
| else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Sec.getObject())) | ||
| return getELFImageLMAForSec(ELFObj->getELFFile(), Sec, | ||
| ELFObj->getFileName()); | ||
| else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Sec.getObject())) | ||
| return getELFImageLMAForSec(ELFObj->getELFFile(), Sec, | ||
| ELFObj->getFileName()); | ||
| const auto *ELFObj = cast<ELF64BEObjectFile>(Sec.getObject()); | ||
| return getELFImageLMAForSec(ELFObj->getELFFile(), Sec, ELFObj->getFileName()); | ||
| } | ||
|
|
||
| void ProfiledBinary::load() { | ||
| // Attempt to open the binary. | ||
| OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path); | ||
| Binary &Binary = *OBinary.getBinary(); | ||
|
|
||
| auto *Obj = dyn_cast<ELFObjectFileBase>(&Binary); | ||
| if (!Obj) | ||
| exitWithError("not a valid Elf image", Path); | ||
|
|
||
| TheTriple = Obj->makeTriple(); | ||
| // Current only support X86 | ||
| if (!TheTriple.isX86()) | ||
| exitWithError("unsupported target", TheTriple.getTriple()); | ||
| LLVM_DEBUG(dbgs() << "Loading " << Path << "\n"); | ||
|
|
||
| // Find the preferred base address for text sections. | ||
| setPreferredBaseAddress(Obj); | ||
|
|
||
| // Disassemble the text sections. | ||
| disassemble(Obj); | ||
|
|
||
| // TODO: decode other sections. | ||
|
|
||
| return; | ||
| } | ||
|
|
||
| void ProfiledBinary::setPreferredBaseAddress(const ELFObjectFileBase *Obj) { | ||
| for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); | ||
| SI != SE; ++SI) { | ||
| const SectionRef &Section = *SI; | ||
| if (Section.isText()) { | ||
| PreferredBaseAddress = getELFImageLMAForSec(Section); | ||
| return; | ||
| } | ||
| } | ||
| exitWithError("no text section found", Obj->getFileName()); | ||
| } | ||
|
|
||
| bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes, | ||
| SectionSymbolsTy &Symbols, | ||
| const SectionRef &Section) { | ||
|
|
||
| std::size_t SE = Symbols.size(); | ||
| uint64_t SectionOffset = Section.getAddress() - PreferredBaseAddress; | ||
| uint64_t SectSize = Section.getSize(); | ||
| uint64_t StartOffset = Symbols[SI].Addr - PreferredBaseAddress; | ||
| uint64_t EndOffset = (SI + 1 < SE) | ||
| ? Symbols[SI + 1].Addr - PreferredBaseAddress | ||
| : SectionOffset + SectSize; | ||
| if (StartOffset >= EndOffset) | ||
| return true; | ||
|
|
||
| std::string &&SymbolName = Symbols[SI].Name.str(); | ||
| if (ShowDisassembly) | ||
| outs() << '<' << SymbolName << ">:\n"; | ||
|
|
||
| uint64_t Offset = StartOffset; | ||
| while (Offset < EndOffset) { | ||
| MCInst Inst; | ||
| uint64_t Size; | ||
| // Disassemble an instruction. | ||
| if (!DisAsm->getInstruction(Inst, Size, Bytes.slice(Offset - SectionOffset), | ||
| Offset + PreferredBaseAddress, nulls())) | ||
| return false; | ||
|
|
||
| if (ShowDisassembly) { | ||
| outs() << format("%8" PRIx64 ":", Offset); | ||
| size_t Start = outs().tell(); | ||
| IP->printInst(&Inst, Offset + Size, "", *STI.get(), outs()); | ||
| if (ShowSourceLocations) { | ||
| unsigned Cur = outs().tell() - Start; | ||
| if (Cur < 40) | ||
| outs().indent(40 - Cur); | ||
| InstructionPointer Inst(this, Offset); | ||
| outs() << getReversedLocWithContext(symbolize(Inst)); | ||
| } | ||
| outs() << "\n"; | ||
| } | ||
|
|
||
| const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode()); | ||
|
|
||
| // Populate address maps. | ||
| CodeAddrs.push_back(Offset); | ||
| if (MCDesc.isCall()) | ||
| CallAddrs.insert(Offset); | ||
| else if (MCDesc.isReturn()) | ||
| RetAddrs.insert(Offset); | ||
|
|
||
| Offset += Size; | ||
| } | ||
|
|
||
| if (ShowDisassembly) | ||
| outs() << "\n"; | ||
|
|
||
| FuncStartAddrMap[StartOffset] = Symbols[SI].Name.str(); | ||
| return true; | ||
| } | ||
|
|
||
| void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) { | ||
| const Target *TheTarget = getTarget(Obj); | ||
| std::string TripleName = TheTriple.getTriple(); | ||
| StringRef FileName = Obj->getFileName(); | ||
|
|
||
| MRI.reset(TheTarget->createMCRegInfo(TripleName)); | ||
| if (!MRI) | ||
| exitWithError("no register info for target " + TripleName, FileName); | ||
|
|
||
| MCTargetOptions MCOptions; | ||
| AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); | ||
| if (!AsmInfo) | ||
| exitWithError("no assembly info for target " + TripleName, FileName); | ||
|
|
||
| SubtargetFeatures Features = Obj->getFeatures(); | ||
| STI.reset( | ||
| TheTarget->createMCSubtargetInfo(TripleName, "", Features.getString())); | ||
| if (!STI) | ||
| exitWithError("no subtarget info for target " + TripleName, FileName); | ||
|
|
||
| MII.reset(TheTarget->createMCInstrInfo()); | ||
| if (!MII) | ||
| exitWithError("no instruction info for target " + TripleName, FileName); | ||
|
|
||
| MCObjectFileInfo MOFI; | ||
| MCContext Ctx(AsmInfo.get(), MRI.get(), &MOFI); | ||
| MOFI.InitMCObjectFileInfo(Triple(TripleName), false, Ctx); | ||
| DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx)); | ||
| if (!DisAsm) | ||
| exitWithError("no disassembler for target " + TripleName, FileName); | ||
|
|
||
| MIA.reset(TheTarget->createMCInstrAnalysis(MII.get())); | ||
|
|
||
| int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); | ||
| IP.reset(TheTarget->createMCInstPrinter(Triple(TripleName), AsmPrinterVariant, | ||
| *AsmInfo, *MII, *MRI)); | ||
| IP->setPrintBranchImmAsAddress(true); | ||
| } | ||
|
|
||
| void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) { | ||
| // Set up disassembler and related components. | ||
| setUpDisassembler(Obj); | ||
|
|
||
| // Create a mapping from virtual address to symbol name. The symbols in text | ||
| // sections are the candidates to dissassemble. | ||
| std::map<SectionRef, SectionSymbolsTy> AllSymbols; | ||
| StringRef FileName = Obj->getFileName(); | ||
| for (const SymbolRef &Symbol : Obj->symbols()) { | ||
| const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); | ||
| const StringRef Name = unwrapOrError(Symbol.getName(), FileName); | ||
| section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName); | ||
| if (SecI != Obj->section_end()) | ||
| AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE)); | ||
| } | ||
|
|
||
| // Sort all the symbols. Use a stable sort to stabilize the output. | ||
| for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols) | ||
| stable_sort(SecSyms.second); | ||
|
|
||
| if (ShowDisassembly) | ||
| outs() << "\nDisassembly of " << FileName << ":\n"; | ||
|
|
||
| // Dissassemble a text section. | ||
| for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); | ||
| SI != SE; ++SI) { | ||
| const SectionRef &Section = *SI; | ||
| if (!Section.isText()) | ||
| continue; | ||
|
|
||
| uint64_t ImageLoadAddr = PreferredBaseAddress; | ||
| uint64_t SectionOffset = Section.getAddress() - ImageLoadAddr; | ||
| uint64_t SectSize = Section.getSize(); | ||
| if (!SectSize) | ||
| continue; | ||
|
|
||
| // Register the text section. | ||
| TextSections.insert({SectionOffset, SectSize}); | ||
|
|
||
| if (ShowDisassembly) { | ||
| StringRef SectionName = unwrapOrError(Section.getName(), FileName); | ||
| outs() << "\nDisassembly of section " << SectionName; | ||
| outs() << " [" << format("0x%" PRIx64, SectionOffset) << ", " | ||
| << format("0x%" PRIx64, SectionOffset + SectSize) << "]:\n\n"; | ||
| } | ||
|
|
||
| // Get the section data. | ||
| ArrayRef<uint8_t> Bytes = | ||
| arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName)); | ||
|
|
||
| // Get the list of all the symbols in this section. | ||
| SectionSymbolsTy &Symbols = AllSymbols[Section]; | ||
|
|
||
| // Disassemble symbol by symbol. | ||
| for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) { | ||
| if (!dissassembleSymbol(SI, Bytes, Symbols, Section)) | ||
| exitWithError("disassembling error", FileName); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| void ProfiledBinary::setupSymbolizer() { | ||
| symbolize::LLVMSymbolizer::Options SymbolizerOpts; | ||
| SymbolizerOpts.PrintFunctions = | ||
| DILineInfoSpecifier::FunctionNameKind::LinkageName; | ||
| SymbolizerOpts.Demangle = false; | ||
| SymbolizerOpts.DefaultArch = TheTriple.getArchName().str(); | ||
| SymbolizerOpts.UseSymbolTable = false; | ||
| SymbolizerOpts.RelativeAddresses = false; | ||
| Symbolizer = std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts); | ||
| } | ||
|
|
||
| FrameLocationStack ProfiledBinary::symbolize(const InstructionPointer &IP) { | ||
| assert(this == IP.Binary && | ||
| "Binary should only symbolize its own instruction"); | ||
| auto Addr = object::SectionedAddress{IP.Offset + PreferredBaseAddress, | ||
| object::SectionedAddress::UndefSection}; | ||
| DIInliningInfo InlineStack = | ||
| unwrapOrError(Symbolizer->symbolizeInlinedCode(Path, Addr), getName()); | ||
|
|
||
| FrameLocationStack CallStack; | ||
|
|
||
| for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) { | ||
| const auto &CallerFrame = InlineStack.getFrame(I); | ||
| if (CallerFrame.FunctionName == "<invalid>") | ||
| break; | ||
| LineLocation Line(CallerFrame.Line - CallerFrame.StartLine, | ||
| CallerFrame.Discriminator); | ||
| FrameLocation Callsite(CallerFrame.FunctionName, Line); | ||
| CallStack.push_back(Callsite); | ||
| } | ||
|
|
||
| return CallStack; | ||
| } | ||
|
|
||
| } // end namespace sampleprof | ||
| } // end namespace llvm |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,126 @@ | ||
| //===-- ProfiledBinary.h - Binary decoder -----------------------*- C++ -*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H | ||
| #define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H | ||
|
|
||
| #include "CallContext.h" | ||
| #include "llvm/ADT/StringRef.h" | ||
| #include "llvm/DebugInfo/Symbolize/Symbolize.h" | ||
| #include "llvm/MC/MCAsmInfo.h" | ||
| #include "llvm/MC/MCContext.h" | ||
| #include "llvm/MC/MCDisassembler/MCDisassembler.h" | ||
| #include "llvm/MC/MCInst.h" | ||
| #include "llvm/MC/MCInstPrinter.h" | ||
| #include "llvm/MC/MCInstrAnalysis.h" | ||
| #include "llvm/MC/MCInstrInfo.h" | ||
| #include "llvm/MC/MCObjectFileInfo.h" | ||
| #include "llvm/MC/MCRegisterInfo.h" | ||
| #include "llvm/MC/MCSubtargetInfo.h" | ||
| #include "llvm/MC/MCTargetOptions.h" | ||
| #include "llvm/Object/ELFObjectFile.h" | ||
| #include "llvm/Support/Path.h" | ||
| #include <set> | ||
| #include <string> | ||
| #include <unordered_map> | ||
| #include <unordered_set> | ||
| #include <vector> | ||
|
|
||
| using namespace llvm::object; | ||
|
|
||
| namespace llvm { | ||
| namespace sampleprof { | ||
|
|
||
| class ProfiledBinary; | ||
|
|
||
| struct InstructionPointer { | ||
| ProfiledBinary *Binary; | ||
| // Offset to the base address of the executable segment of the binary. | ||
| uint64_t Offset; | ||
| // Index to the sorted code address array of the binary. | ||
| uint64_t Index; | ||
|
|
||
| InstructionPointer(ProfiledBinary *Binary, uint64_t Offset) | ||
| : Binary(Binary), Offset(Offset) { | ||
| Index = 0; | ||
| } | ||
| }; | ||
|
|
||
| class ProfiledBinary { | ||
| // Absolute path of the binary. | ||
| std::string Path; | ||
| // The target triple. | ||
| Triple TheTriple; | ||
| // The runtime base address that the executable sections are loaded at. | ||
| mutable uint64_t BaseAddress = 0; | ||
| // The preferred base address that the executable sections are loaded at. | ||
| uint64_t PreferredBaseAddress = 0; | ||
| // Mutiple MC component info | ||
| std::unique_ptr<const MCRegisterInfo> MRI; | ||
| std::unique_ptr<const MCAsmInfo> AsmInfo; | ||
| std::unique_ptr<const MCSubtargetInfo> STI; | ||
| std::unique_ptr<const MCInstrInfo> MII; | ||
| std::unique_ptr<MCDisassembler> DisAsm; | ||
| std::unique_ptr<const MCInstrAnalysis> MIA; | ||
| std::unique_ptr<MCInstPrinter> IP; | ||
| // A list of text sections sorted by start RVA and size. Used to check | ||
| // if a given RVA is a valid code address. | ||
| std::set<std::pair<uint64_t, uint64_t>> TextSections; | ||
| // Function offset to name mapping. | ||
| std::unordered_map<uint64_t, std::string> FuncStartAddrMap; | ||
| // An array of offsets of all instructions sorted in increasing order. The | ||
| // sorting is needed to fast advance to the next forward/backward instruction. | ||
| std::vector<uint64_t> CodeAddrs; | ||
| // A set of call instruction offsets. Used by virtual unwinding. | ||
| std::unordered_set<uint64_t> CallAddrs; | ||
| // A set of return instruction offsets. Used by virtual unwinding. | ||
| std::unordered_set<uint64_t> RetAddrs; | ||
|
|
||
| // The symbolizer used to get inline context for an instruction. | ||
| std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer; | ||
|
|
||
| void setPreferredBaseAddress(const ELFObjectFileBase *O); | ||
|
|
||
| // Set up disassembler and related components. | ||
| void setUpDisassembler(const ELFObjectFileBase *Obj); | ||
| void setupSymbolizer(); | ||
|
|
||
| /// Dissassemble the text section and build various address maps. | ||
| void disassemble(const ELFObjectFileBase *O); | ||
|
|
||
| /// Helper function to dissassemble the symbol and extract info for unwinding | ||
| bool dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes, | ||
| SectionSymbolsTy &Symbols, const SectionRef &Section); | ||
| /// Symbolize a given instruction pointer and return a full call context. | ||
| FrameLocationStack symbolize(const InstructionPointer &I); | ||
|
|
||
| /// Decode the interesting parts of the binary and build internal data | ||
| /// structures. On high level, the parts of interest are: | ||
| /// 1. Text sections, including the main code section and the PLT | ||
| /// entries that will be used to handle cross-module call transitions. | ||
| /// 2. The .debug_line section, used by Dwarf-based profile generation. | ||
| /// 3. Pseudo probe related sections, used by probe-based profile | ||
| /// generation. | ||
| void load(); | ||
|
|
||
| public: | ||
| ProfiledBinary(StringRef Path) : Path(Path) { | ||
| setupSymbolizer(); | ||
| load(); | ||
| } | ||
|
|
||
| const StringRef getPath() const { return Path; } | ||
| const StringRef getName() const { return llvm::sys::path::filename(Path); } | ||
| uint64_t getBaseAddress() const { return BaseAddress; } | ||
| void setBaseAddress(uint64_t Address) { BaseAddress = Address; } | ||
| }; | ||
|
|
||
| } // end namespace sampleprof | ||
| } // end namespace llvm | ||
|
|
||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| //===- llvm-profgen.cpp - LLVM SPGO profile generation tool ---------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // llvm-profgen generates SPGO profiles from perf script ouput. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "ErrorHandling.h" | ||
| #include "PerfReader.h" | ||
| #include "ProfiledBinary.h" | ||
| #include "llvm/Support/CommandLine.h" | ||
| #include "llvm/Support/InitLLVM.h" | ||
| #include "llvm/Support/TargetSelect.h" | ||
|
|
||
| static cl::list<std::string> PerfTraceFilenames( | ||
| "perfscript", cl::value_desc("perfscript"), cl::OneOrMore, | ||
| llvm::cl::MiscFlags::CommaSeparated, | ||
| cl::desc("Path of perf-script trace created by Linux perf tool with " | ||
| "`script` command(the raw perf.data should be profiled with -b)")); | ||
|
|
||
| static cl::list<std::string> | ||
| BinaryFilenames("binary", cl::value_desc("binary"), cl::OneOrMore, | ||
| llvm::cl::MiscFlags::CommaSeparated, | ||
| cl::desc("Path of profiled binary files")); | ||
|
|
||
| static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), | ||
| cl::Required, | ||
| cl::desc("Output profile file")); | ||
|
|
||
| using namespace llvm; | ||
| using namespace sampleprof; | ||
|
|
||
| int main(int argc, const char *argv[]) { | ||
| InitLLVM X(argc, argv); | ||
|
|
||
| cl::ParseCommandLineOptions(argc, argv, "llvm SPGO profile generator\n"); | ||
|
|
||
| // Initialize targets and assembly printers/parsers. | ||
| InitializeAllTargetInfos(); | ||
| InitializeAllTargetMCs(); | ||
| InitializeAllDisassemblers(); | ||
|
|
||
| // Load binaries and parse perf events and samples | ||
| PerfReader Reader(BinaryFilenames); | ||
| Reader.parsePerfTraces(PerfTraceFilenames); | ||
|
|
||
| return EXIT_SUCCESS; | ||
| } |