From cbec9af6bfb0df5ff19e914f83cc744e0ff5e466 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Fri, 9 Aug 2019 11:26:27 +0000 Subject: [PATCH] [MCA] Add flag -show-encoding to llvm-mca. Flag -show-encoding enables the printing of instruction encodings as part of the the instruction info view. Example (with flags -mtriple=x86_64-- -mcpu=btver2): Instruction Info: [1]: #uOps [2]: Latency [3]: RThroughput [4]: MayLoad [5]: MayStore [6]: HasSideEffects (U) [7]: Encoding Size [1] [2] [3] [4] [5] [6] [7] Encodings: Instructions: 1 2 1.00 4 c5 f0 59 d0 vmulps %xmm0, %xmm1, %xmm2 1 4 1.00 4 c5 eb 7c da vhaddps %xmm2, %xmm2, %xmm3 1 4 1.00 4 c5 e3 7c e3 vhaddps %xmm3, %xmm3, %xmm4 In this example, column Encoding Size is the size in bytes of the instruction encoding. Column Encodings reports the actual instruction encodings as byte sequences in hex (objdump style). The computation of encodings is done by a utility class named mca::CodeEmitter. In future, I plan to expose the CodeEmitter to the instruction builder, so that information about instruction encoding sizes can be used by the simulator. That would be a first step towards simulating the throughput from the decoders in the hardware frontend. Differential Revision: https://reviews.llvm.org/D65948 llvm-svn: 368432 --- llvm/docs/CommandGuide/llvm-mca.rst | 37 ++++++++- llvm/include/llvm/MCA/CodeEmitter.h | 72 +++++++++++++++++ llvm/lib/MCA/CMakeLists.txt | 1 + llvm/lib/MCA/CodeEmitter.cpp | 37 +++++++++ llvm/test/tools/llvm-mca/X86/show-encoding.s | 77 +++++++++++++++++++ .../llvm-mca/Views/InstructionInfoView.cpp | 31 ++++++-- .../llvm-mca/Views/InstructionInfoView.h | 13 +++- llvm/tools/llvm-mca/llvm-mca.cpp | 23 +++++- 8 files changed, 274 insertions(+), 17 deletions(-) create mode 100644 llvm/include/llvm/MCA/CodeEmitter.h create mode 100644 llvm/lib/MCA/CodeEmitter.cpp create mode 100644 llvm/test/tools/llvm-mca/X86/show-encoding.s diff --git a/llvm/docs/CommandGuide/llvm-mca.rst b/llvm/docs/CommandGuide/llvm-mca.rst index f2ebbec43c05f3..4f8704ad9a96d7 100644 --- a/llvm/docs/CommandGuide/llvm-mca.rst +++ b/llvm/docs/CommandGuide/llvm-mca.rst @@ -174,6 +174,10 @@ option specifies "``-``", then the output will also be sent to standard output. Enable the instruction info view. This is enabled by default. +.. option:: -show-encoding + + Enable the printing of instruction encodings within the instruction info view. + .. option:: -all-stats Print all hardware statistics. This enables extra statistics related to the @@ -415,10 +419,10 @@ an indicator of a performance bottleneck caused by the lack of hardware resources, and the *Resource pressure view* can help to identify the problematic resource usage. -The second section of the report shows the latency and reciprocal -throughput of every instruction in the sequence. That section also reports -extra information related to the number of micro opcodes, and opcode properties -(i.e., 'MayLoad', 'MayStore', and 'HasSideEffects'). +The second section of the report is the `instruction info view`. It shows the +latency and reciprocal throughput of every instruction in the sequence. It also +reports extra information related to the number of micro opcodes, and opcode +properties (i.e., 'MayLoad', 'MayStore', and 'HasSideEffects'). Field *RThroughput* is the reciprocal of the instruction throughput. Throughput is computed as the maximum number of instructions of a same type that can be @@ -427,6 +431,31 @@ example, the reciprocal throughput of a vector float multiply is 1 cycles/instruction. That is because the FP multiplier JFPM is only available from pipeline JFPU1. +Instruction encodings are displayed within the instruction info view when flag +`-show-encoding` is specified. + +Below is an example of `-show-encoding` output for the dot-product kernel: + +.. code-block:: none + + Instruction Info: + [1]: #uOps + [2]: Latency + [3]: RThroughput + [4]: MayLoad + [5]: MayStore + [6]: HasSideEffects (U) + [7]: Encoding Size + + [1] [2] [3] [4] [5] [6] [7] Encodings: Instructions: + 1 2 1.00 4 c5 f0 59 d0 vmulps %xmm0, %xmm1, %xmm2 + 1 4 1.00 4 c5 eb 7c da vhaddps %xmm2, %xmm2, %xmm3 + 1 4 1.00 4 c5 e3 7c e3 vhaddps %xmm3, %xmm3, %xmm4 + +The `Encoding Size` column shows the size in bytes of instructions. The +`Encodings` column shows the actual instruction encodings (byte sequences in +hex). + The third section is the *Resource pressure view*. This view reports the average number of resource cycles consumed every iteration by instructions for every processor resource unit available on the target. Information is diff --git a/llvm/include/llvm/MCA/CodeEmitter.h b/llvm/include/llvm/MCA/CodeEmitter.h new file mode 100644 index 00000000000000..c8d222bd8c2fdf --- /dev/null +++ b/llvm/include/llvm/MCA/CodeEmitter.h @@ -0,0 +1,72 @@ +//===--------------------- CodeEmitter.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// A utility class used to compute instruction encodings. It buffers encodings +/// for later usage. It exposes a simple API to compute and get the encodings as +/// StringRef. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_CODEEMITTER_H +#define LLVM_MCA_CODEEMITTER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/Instruction.h" +#include "llvm/MCA/Support.h" +#include "llvm/Support/raw_ostream.h" + +#include + +namespace llvm { +namespace mca { + +/// A utility class used to compute instruction encodings for a code region. +/// +/// It provides a simple API to compute and return instruction encodings as +/// strings. Encodings are cached internally for later usage. +class CodeEmitter { + const MCSubtargetInfo &STI; + const MCAsmBackend &MAB; + const MCCodeEmitter &MCE; + + SmallString<256> Code; + raw_svector_ostream VecOS; + ArrayRef Sequence; + + // An EncodingInfo pair stores information. Base (i.e. first) + // is an index to the `Code`. Length (i.e. second) is the encoding size. + using EncodingInfo = std::pair; + + // A cache of encodings. + SmallVector Encodings; + + EncodingInfo getOrCreateEncodingInfo(unsigned MCID); + +public: + CodeEmitter(const MCSubtargetInfo &ST, const MCAsmBackend &AB, + const MCCodeEmitter &CE, ArrayRef S) + : STI(ST), MAB(AB), MCE(CE), VecOS(Code), Sequence(S), + Encodings(S.size()) {} + + StringRef getEncoding(unsigned MCID) { + EncodingInfo EI = getOrCreateEncodingInfo(MCID); + return StringRef(&Code[EI.first], EI.second); + } +}; + +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_CODEEMITTER_H diff --git a/llvm/lib/MCA/CMakeLists.txt b/llvm/lib/MCA/CMakeLists.txt index 4965b6b31c1200..e2eb06eb97c7e4 100644 --- a/llvm/lib/MCA/CMakeLists.txt +++ b/llvm/lib/MCA/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMMCA + CodeEmitter.cpp Context.cpp HWEventListener.cpp HardwareUnits/HardwareUnit.cpp diff --git a/llvm/lib/MCA/CodeEmitter.cpp b/llvm/lib/MCA/CodeEmitter.cpp new file mode 100644 index 00000000000000..294107219cb005 --- /dev/null +++ b/llvm/lib/MCA/CodeEmitter.cpp @@ -0,0 +1,37 @@ +//===--------------------- CodeEmitter.cpp ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the CodeEmitter API. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/CodeEmitter.h" + +namespace llvm { +namespace mca { + +CodeEmitter::EncodingInfo +CodeEmitter::getOrCreateEncodingInfo(unsigned MCID) { + EncodingInfo &EI = Encodings[MCID]; + if (EI.second) + return EI; + + SmallVector Fixups; + const MCInst &Inst = Sequence[MCID]; + MCInst Relaxed(Sequence[MCID]); + if (MAB.mayNeedRelaxation(Inst, STI)) + MAB.relaxInstruction(Inst, STI, Relaxed); + + EI.first = Code.size(); + MCE.encodeInstruction(Relaxed, VecOS, Fixups, STI); + EI.second = Code.size() - EI.first; + return EI; +} + +} // namespace mca +} // namespace llvm diff --git a/llvm/test/tools/llvm-mca/X86/show-encoding.s b/llvm/test/tools/llvm-mca/X86/show-encoding.s new file mode 100644 index 00000000000000..1a81bdf6fb8352 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/show-encoding.s @@ -0,0 +1,77 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-views=false -instruction-info < %s | FileCheck %s --check-prefix=NORMAL +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-views=false -instruction-info -show-encoding=false < %s | FileCheck %s --check-prefix=NORMAL +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-views=false -instruction-info -show-encoding < %s | FileCheck %s --check-prefix=WITHENCODINGS + + movq 0x170(%rbp), %r10 + lea (%r8,%r8,2), %r9d + movsx %r9d, %r9 + inc %r8d + movq 0x178(%rbp), %r11 + vmovups (%r10,%r9,4), %xmm3 + vpslldq $0x4, %xmm3, %xmm2 + vpslldq $0x4, %xmm3, %xmm4 + vaddps %xmm2, %xmm3, %xmm6 + vpslldq $0xc, %xmm3, %xmm5 + vaddps %xmm4, %xmm5, %xmm7 + vaddps %xmm6, %xmm7, %xmm8 + vaddps %xmm8, %xmm0, %xmm9 + vshufps $0xff, %xmm9, %xmm9, %xmm0 + vmovups %xmm9, (%r11,%r9,4) + cmp %r8d, %esi + jl -90 + +# NORMAL: Instruction Info: +# NORMAL-NEXT: [1]: #uOps +# NORMAL-NEXT: [2]: Latency +# NORMAL-NEXT: [3]: RThroughput +# NORMAL-NEXT: [4]: MayLoad +# NORMAL-NEXT: [5]: MayStore +# NORMAL-NEXT: [6]: HasSideEffects (U) + +# WITHENCODINGS: Instruction Info: +# WITHENCODINGS-NEXT: [1]: #uOps +# WITHENCODINGS-NEXT: [2]: Latency +# WITHENCODINGS-NEXT: [3]: RThroughput +# WITHENCODINGS-NEXT: [4]: MayLoad +# WITHENCODINGS-NEXT: [5]: MayStore +# WITHENCODINGS-NEXT: [6]: HasSideEffects (U) +# WITHENCODINGS-NEXT: [7]: Encoding Size + +# NORMAL: [1] [2] [3] [4] [5] [6] Instructions: +# NORMAL-NEXT: 1 3 1.00 * movq 368(%rbp), %r10 +# NORMAL-NEXT: 1 2 1.00 leal (%r8,%r8,2), %r9d +# NORMAL-NEXT: 1 1 0.50 movslq %r9d, %r9 +# NORMAL-NEXT: 1 1 0.50 incl %r8d +# NORMAL-NEXT: 1 3 1.00 * movq 376(%rbp), %r11 +# NORMAL-NEXT: 1 5 1.00 * vmovups (%r10,%r9,4), %xmm3 +# NORMAL-NEXT: 1 1 0.50 vpslldq $4, %xmm3, %xmm2 +# NORMAL-NEXT: 1 1 0.50 vpslldq $4, %xmm3, %xmm4 +# NORMAL-NEXT: 1 3 1.00 vaddps %xmm2, %xmm3, %xmm6 +# NORMAL-NEXT: 1 1 0.50 vpslldq $12, %xmm3, %xmm5 +# NORMAL-NEXT: 1 3 1.00 vaddps %xmm4, %xmm5, %xmm7 +# NORMAL-NEXT: 1 3 1.00 vaddps %xmm6, %xmm7, %xmm8 +# NORMAL-NEXT: 1 3 1.00 vaddps %xmm8, %xmm0, %xmm9 +# NORMAL-NEXT: 1 1 0.50 vshufps $255, %xmm9, %xmm9, %xmm0 +# NORMAL-NEXT: 1 1 1.00 * vmovups %xmm9, (%r11,%r9,4) +# NORMAL-NEXT: 1 1 0.50 cmpl %r8d, %esi +# NORMAL-NEXT: 1 1 0.50 jl -90 + +# WITHENCODINGS: [1] [2] [3] [4] [5] [6] [7] Encodings: Instructions: +# WITHENCODINGS-NEXT: 1 3 1.00 * 7 4c 8b 95 70 01 00 00 movq 368(%rbp), %r10 +# WITHENCODINGS-NEXT: 1 2 1.00 4 47 8d 0c 40 leal (%r8,%r8,2), %r9d +# WITHENCODINGS-NEXT: 1 1 0.50 3 4d 63 c9 movslq %r9d, %r9 +# WITHENCODINGS-NEXT: 1 1 0.50 3 41 ff c0 incl %r8d +# WITHENCODINGS-NEXT: 1 3 1.00 * 7 4c 8b 9d 78 01 00 00 movq 376(%rbp), %r11 +# WITHENCODINGS-NEXT: 1 5 1.00 * 6 c4 81 78 10 1c 8a vmovups (%r10,%r9,4), %xmm3 +# WITHENCODINGS-NEXT: 1 1 0.50 5 c5 e9 73 fb 04 vpslldq $4, %xmm3, %xmm2 +# WITHENCODINGS-NEXT: 1 1 0.50 5 c5 d9 73 fb 04 vpslldq $4, %xmm3, %xmm4 +# WITHENCODINGS-NEXT: 1 3 1.00 4 c5 e0 58 f2 vaddps %xmm2, %xmm3, %xmm6 +# WITHENCODINGS-NEXT: 1 1 0.50 5 c5 d1 73 fb 0c vpslldq $12, %xmm3, %xmm5 +# WITHENCODINGS-NEXT: 1 3 1.00 4 c5 d0 58 fc vaddps %xmm4, %xmm5, %xmm7 +# WITHENCODINGS-NEXT: 1 3 1.00 4 c5 40 58 c6 vaddps %xmm6, %xmm7, %xmm8 +# WITHENCODINGS-NEXT: 1 3 1.00 5 c4 41 78 58 c8 vaddps %xmm8, %xmm0, %xmm9 +# WITHENCODINGS-NEXT: 1 1 0.50 6 c4 c1 30 c6 c1 ff vshufps $255, %xmm9, %xmm9, %xmm0 +# WITHENCODINGS-NEXT: 1 1 1.00 * 6 c4 01 78 11 0c 8b vmovups %xmm9, (%r11,%r9,4) +# WITHENCODINGS-NEXT: 1 1 0.50 3 44 39 c6 cmpl %r8d, %esi +# WITHENCODINGS-NEXT: 1 1 0.50 6 0f 8c 00 00 00 00 jl -90 diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp index 1fbffa3e5b6979..a6f9153b4945cf 100644 --- a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp +++ b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "Views/InstructionInfoView.h" +#include "llvm/Support/FormattedStream.h" namespace llvm { namespace mca { @@ -26,10 +27,17 @@ void InstructionInfoView::printView(raw_ostream &OS) const { TempStream << "\n\nInstruction Info:\n"; TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n" - << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n\n"; + << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n"; + if (PrintEncodings) { + TempStream << "[7]: Encoding Size\n"; + TempStream << "\n[1] [2] [3] [4] [5] [6] [7] " + << "Encodings: Instructions:\n"; + } else { + TempStream << "\n[1] [2] [3] [4] [5] [6] Instructions:\n"; + } - TempStream << "[1] [2] [3] [4] [5] [6] Instructions:\n"; - for (const MCInst &Inst : Source) { + for (unsigned I = 0, E = Source.size(); I < E; ++I) { + const MCInst &Inst = Source[I]; const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode()); // Obtain the scheduling class information from the instruction. @@ -72,7 +80,20 @@ void InstructionInfoView::printView(raw_ostream &OS) const { } TempStream << (MCDesc.mayLoad() ? " * " : " "); TempStream << (MCDesc.mayStore() ? " * " : " "); - TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " "); + TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " "); + + if (PrintEncodings) { + StringRef Encoding(CE.getEncoding(I)); + unsigned EncodingSize = Encoding.size(); + TempStream << " " << EncodingSize + << (EncodingSize < 10 ? " " : " "); + TempStream.flush(); + formatted_raw_ostream FOS(TempStream); + for (unsigned i = 0, e = Encoding.size(); i != e; ++i) + FOS << format("%02x ", (uint8_t)Encoding[i]); + FOS.PadToColumn(30); + FOS.flush(); + } MCIP.printInst(&Inst, InstrStream, "", STI); InstrStream.flush(); @@ -80,7 +101,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const { // Consume any tabs or spaces at the beginning of the string. StringRef Str(Instruction); Str = Str.ltrim(); - TempStream << " " << Str << '\n'; + TempStream << Str << '\n'; Instruction = ""; } diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.h b/llvm/tools/llvm-mca/Views/InstructionInfoView.h index 640d87383436ef..0e948304119fdb 100644 --- a/llvm/tools/llvm-mca/Views/InstructionInfoView.h +++ b/llvm/tools/llvm-mca/Views/InstructionInfoView.h @@ -40,6 +40,7 @@ #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/CodeEmitter.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "llvm-mca" @@ -51,14 +52,18 @@ namespace mca { class InstructionInfoView : public View { const llvm::MCSubtargetInfo &STI; const llvm::MCInstrInfo &MCII; + CodeEmitter &CE; + bool PrintEncodings; llvm::ArrayRef Source; llvm::MCInstPrinter &MCIP; public: - InstructionInfoView(const llvm::MCSubtargetInfo &sti, - const llvm::MCInstrInfo &mcii, - llvm::ArrayRef S, llvm::MCInstPrinter &IP) - : STI(sti), MCII(mcii), Source(S), MCIP(IP) {} + InstructionInfoView(const llvm::MCSubtargetInfo &ST, + const llvm::MCInstrInfo &II, CodeEmitter &C, + bool ShouldPrintEncodings, llvm::ArrayRef S, + llvm::MCInstPrinter &IP) + : STI(ST), MCII(II), CE(C), PrintEncodings(ShouldPrintEncodings), + Source(S), MCIP(IP) {} void printView(llvm::raw_ostream &OS) const override; }; diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp index 63a748c3212cd5..1bcf850193e6a6 100644 --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -32,12 +32,15 @@ #include "Views/SchedulerStatistics.h" #include "Views/SummaryView.h" #include "Views/TimelineView.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.inc" +#include "llvm/MCA/CodeEmitter.h" #include "llvm/MCA/Context.h" #include "llvm/MCA/InstrBuilder.h" #include "llvm/MCA/Pipeline.h" @@ -200,6 +203,11 @@ static cl::opt EnableBottleneckAnalysis( cl::desc("Enable bottleneck analysis (disabled by default)"), cl::cat(ViewOptions), cl::init(false)); +static cl::opt ShowEncoding( + "show-encoding", + cl::desc("Print encoding information in the instruction info view"), + cl::cat(ViewOptions), cl::init(false)); + namespace { const Target *getTarget(const char *ProgName) { @@ -424,6 +432,12 @@ int main(int argc, char **argv) { // Number each region in the sequence. unsigned RegionIdx = 0; + std::unique_ptr MCE( + TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); + + std::unique_ptr MAB(TheTarget->createMCAsmBackend( + *STI, *MRI, InitMCTargetOptionsFromFlags())); + for (const std::unique_ptr &Region : Regions) { // Skip empty code regions. if (Region->empty()) @@ -441,6 +455,7 @@ int main(int argc, char **argv) { // Lower the MCInst sequence into an mca::Instruction sequence. ArrayRef Insts = Region->getInstructions(); + mca::CodeEmitter CE(*STI, *MAB, *MCE, Insts); std::vector> LoweredSequence; for (const MCInst &MCI : Insts) { Expected> Inst = @@ -478,7 +493,7 @@ int main(int argc, char **argv) { // Create the views for this pipeline, execute, and emit a report. if (PrintInstructionInfoView) { Printer.addView(llvm::make_unique( - *STI, *MCII, Insts, *IP)); + *STI, *MCII, CE, ShowEncoding, Insts, *IP)); } Printer.addView( llvm::make_unique(*STI, *IP, Insts)); @@ -504,8 +519,8 @@ int main(int argc, char **argv) { } if (PrintInstructionInfoView) - Printer.addView( - llvm::make_unique(*STI, *MCII, Insts, *IP)); + Printer.addView(llvm::make_unique( + *STI, *MCII, CE, ShowEncoding, Insts, *IP)); if (PrintDispatchStats) Printer.addView(llvm::make_unique());