| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,100 @@ | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=100 -timeline -timeline-max-iterations=1 < %s | FileCheck %s | ||
|
|
||
| vmovaps (%rsi), %xmm0 | ||
| vmovaps %xmm0, (%rdi) | ||
| vmovaps 16(%rsi), %xmm0 | ||
| vmovaps %xmm0, 16(%rdi) | ||
| vmovaps 32(%rsi), %xmm0 | ||
| vmovaps %xmm0, 32(%rdi) | ||
| vmovaps 48(%rsi), %xmm0 | ||
| vmovaps %xmm0, 48(%rdi) | ||
|
|
||
|
|
||
| # CHECK: Iterations: 100 | ||
| # CHECK-NEXT: Instructions: 800 | ||
| # CHECK-NEXT: Total Cycles: 408 | ||
| # CHECK-NEXT: Dispatch Width: 2 | ||
| # CHECK-NEXT: IPC: 1.96 | ||
|
|
||
|
|
||
| # CHECK: Resources: | ||
| # CHECK-NEXT: [0] - JALU0 | ||
| # CHECK-NEXT: [1] - JALU1 | ||
| # CHECK-NEXT: [2] - JDiv | ||
| # CHECK-NEXT: [3] - JFPA | ||
| # CHECK-NEXT: [4] - JFPM | ||
| # CHECK-NEXT: [5] - JFPU0 | ||
| # CHECK-NEXT: [6] - JFPU1 | ||
| # CHECK-NEXT: [7] - JLAGU | ||
| # CHECK-NEXT: [8] - JMul | ||
| # CHECK-NEXT: [9] - JSAGU | ||
| # CHECK-NEXT: [10] - JSTC | ||
| # CHECK-NEXT: [11] - JVALU0 | ||
| # CHECK-NEXT: [12] - JVALU1 | ||
| # CHECK-NEXT: [13] - JVIMUL | ||
|
|
||
|
|
||
| # CHECK: Resource pressure per iteration: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] | ||
| # CHECK-NEXT: - - - - - - - 4.00 - 4.00 - - - - | ||
|
|
||
| # CHECK: Resource pressure by instruction: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: | ||
| # CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovaps (%rsi), %xmm0 | ||
| # CHECK-NEXT: - - - - - - - - - 1.00 - - - - vmovaps %xmm0, (%rdi) | ||
| # CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovaps 16(%rsi), %xmm0 | ||
| # CHECK-NEXT: - - - - - - - - - 1.00 - - - - vmovaps %xmm0, 16(%rdi) | ||
| # CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovaps 32(%rsi), %xmm0 | ||
| # CHECK-NEXT: - - - - - - - - - 1.00 - - - - vmovaps %xmm0, 32(%rdi) | ||
| # CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovaps 48(%rsi), %xmm0 | ||
| # CHECK-NEXT: - - - - - - - - - 1.00 - - - - vmovaps %xmm0, 48(%rdi) | ||
|
|
||
|
|
||
| # CHECK: Instruction Info: | ||
| # CHECK-NEXT: [1]: #uOps | ||
| # CHECK-NEXT: [2]: Latency | ||
| # CHECK-NEXT: [3]: RThroughput | ||
| # CHECK-NEXT: [4]: MayLoad | ||
| # CHECK-NEXT: [5]: MayStore | ||
| # CHECK-NEXT: [6]: HasSideEffects | ||
|
|
||
| # CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
| # CHECK-NEXT: 1 5 1.00 * vmovaps (%rsi), %xmm0 | ||
| # CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi) | ||
| # CHECK-NEXT: 1 5 1.00 * vmovaps 16(%rsi), %xmm0 | ||
| # CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi) | ||
| # CHECK-NEXT: 1 5 1.00 * vmovaps 32(%rsi), %xmm0 | ||
| # CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi) | ||
| # CHECK-NEXT: 1 5 1.00 * vmovaps 48(%rsi), %xmm0 | ||
| # CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi) | ||
|
|
||
|
|
||
| # CHECK: Timeline view: | ||
| # CHECK-NEXT: 01 | ||
| # CHECK-NEXT: Index 0123456789 | ||
|
|
||
| # CHECK: [0,0] DeeeeeER .. vmovaps (%rsi), %xmm0 | ||
| # CHECK-NEXT: [0,1] D=====eER .. vmovaps %xmm0, (%rdi) | ||
| # CHECK-NEXT: [0,2] .DeeeeeER .. vmovaps 16(%rsi), %xmm0 | ||
| # CHECK-NEXT: [0,3] .D=====eER.. vmovaps %xmm0, 16(%rdi) | ||
| # CHECK-NEXT: [0,4] . DeeeeeER.. vmovaps 32(%rsi), %xmm0 | ||
| # CHECK-NEXT: [0,5] . D=====eER. vmovaps %xmm0, 32(%rdi) | ||
| # CHECK-NEXT: [0,6] . DeeeeeER. vmovaps 48(%rsi), %xmm0 | ||
| # CHECK-NEXT: [0,7] . D=====eER vmovaps %xmm0, 48(%rdi) | ||
|
|
||
|
|
||
| # CHECK: Average Wait times (based on the timeline view): | ||
| # CHECK-NEXT: [0]: Executions | ||
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
|
||
| # CHECK: [0] [1] [2] [3] | ||
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0 | ||
| # CHECK-NEXT: 1. 1 6.0 0.0 0.0 vmovaps %xmm0, (%rdi) | ||
| # CHECK-NEXT: 2. 1 1.0 1.0 0.0 vmovaps 16(%rsi), %xmm0 | ||
| # CHECK-NEXT: 3. 1 6.0 0.0 0.0 vmovaps %xmm0, 16(%rdi) | ||
| # CHECK-NEXT: 4. 1 1.0 1.0 0.0 vmovaps 32(%rsi), %xmm0 | ||
| # CHECK-NEXT: 5. 1 6.0 0.0 0.0 vmovaps %xmm0, 32(%rdi) | ||
| # CHECK-NEXT: 6. 1 1.0 1.0 0.0 vmovaps 48(%rsi), %xmm0 | ||
| # CHECK-NEXT: 7. 1 6.0 0.0 0.0 vmovaps %xmm0, 48(%rdi) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=100 < %s | FileCheck %s | ||
|
|
||
| add %edi, %eax | ||
|
|
||
| # CHECK: Iterations: 100 | ||
| # CHECK-NEXT: Instructions: 100 | ||
| # CHECK-NEXT: Total Cycles: 103 | ||
| # CHECK-NEXT: Dispatch Width: 2 | ||
| # CHECK-NEXT: IPC: 0.97 | ||
|
|
||
| # CHECK-LABEL: Resources: | ||
| # CHECK-NEXT: [0] - JALU0 | ||
| # CHECK-NEXT: [1] - JALU1 | ||
| # CHECK-NEXT: [2] - JDiv | ||
| # CHECK-NEXT: [3] - JFPA | ||
| # CHECK-NEXT: [4] - JFPM | ||
| # CHECK-NEXT: [5] - JFPU0 | ||
| # CHECK-NEXT: [6] - JFPU1 | ||
| # CHECK-NEXT: [7] - JLAGU | ||
| # CHECK-NEXT: [8] - JMul | ||
| # CHECK-NEXT: [9] - JSAGU | ||
| # CHECK-NEXT: [10] - JSTC | ||
| # CHECK-NEXT: [11] - JVALU0 | ||
| # CHECK-NEXT: [12] - JVALU1 | ||
| # CHECK-NEXT: [13] - JVIMUL | ||
|
|
||
|
|
||
| # CHECK: Resource pressure per iteration: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] | ||
| # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - | ||
|
|
||
| # CHECK: Resource pressure by instruction: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: | ||
| # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - addl %edi, %eax | ||
|
|
||
| # CHECK: Instruction Info: | ||
| # CHECK-NEXT: [1]: #uOps | ||
| # CHECK-NEXT: [2]: Latency | ||
| # CHECK-NEXT: [3]: RThroughput | ||
| # CHECK-NEXT: [4]: MayLoad | ||
| # CHECK-NEXT: [5]: MayStore | ||
| # CHECK-NEXT: [6]: HasSideEffects | ||
|
|
||
| # CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
| # CHECK-NEXT: 1 1 0.50 addl %edi, %eax |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 < %s | FileCheck --check-prefix=ALL --check-prefix=BTVER2 %s | ||
| # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 < %s | FileCheck --check-prefix=ALL --check-prefix=ZNVER1 %s | ||
| # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge < %s | FileCheck --check-prefix=ALL --check-prefix=SANDYBRIDGE %s | ||
| # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge < %s | FileCheck --check-prefix=ALL --check-prefix=IVYBRIDGE %s | ||
| # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell < %s | FileCheck --check-prefix=ALL --check-prefix=HASWELL %s | ||
| # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell < %s | FileCheck --check-prefix=ALL --check-prefix=BROADWELL %s | ||
| # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=knl < %s | FileCheck --check-prefix=ALL --check-prefix=KNL %s | ||
| # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake < %s | FileCheck --check-prefix=ALL --check-prefix=SKX %s | ||
| # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 < %s | FileCheck --check-prefix=ALL --check-prefix=SKX-AVX512 %s | ||
| # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=slm < %s | FileCheck --check-prefix=ALL --check-prefix=SLM %s | ||
|
|
||
| add %edi, %eax | ||
|
|
||
| # ALL: Iterations: 70 | ||
| # ALL-NEXT: Instructions: 70 | ||
|
|
||
| # BTVER2: Dispatch Width: 2 | ||
| # ZNVER1: Dispatch Width: 4 | ||
| # SANDYBRIDGE: Dispatch Width: 4 | ||
| # IVYBRIDGE: Dispatch Width: 4 | ||
| # HASWELL: Dispatch Width: 4 | ||
| # BROADWELL: Dispatch Width: 4 | ||
| # KNL: Dispatch Width: 4 | ||
| # SKX: Dispatch Width: 6 | ||
| # SKX-AVX512: Dispatch Width: 6 | ||
| # SLM: Dispatch Width: 2 | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 < %s 2>&1 | FileCheck --check-prefix=DEFAULT %s | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=0 < %s 2>&1 | FileCheck --check-prefix=DEFAULT %s | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 < %s 2>&1 | FileCheck --check-prefix=CUSTOM %s | ||
|
|
||
| add %eax, %eax | ||
|
|
||
| # DEFAULT: Iterations: 70 | ||
| # DEFAULT-NEXT: Instructions: 70 | ||
|
|
||
| # CUSTOM: Iterations: 1 | ||
| # CUSTOM-NEXT: Instructions: 1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 < %s 2>&1 | FileCheck --check-prefix=DEFAULT %s | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -dispatch=0 < %s 2>&1 | FileCheck --check-prefix=DEFAULT %s | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -dispatch=1 < %s 2>&1 | FileCheck --check-prefix=CUSTOM %s | ||
|
|
||
| add %eax, %eax | ||
|
|
||
| # DEFAULT: Dispatch Width: 2 | ||
| # CUSTOM: Dispatch Width: 1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| # RUN: not llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=atom -o %t1 2>&1 | FileCheck %s | ||
|
|
||
| # CHECK: error: please specify an out-of-order cpu. 'atom' is an in-order cpu. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| # RUN: not llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 %s | ||
|
|
||
| invalid_instruction_mnemonic |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| # RUN: not llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=foo -o %t1 2>&1 | FileCheck %s | ||
|
|
||
| # CHECK: 'foo' is not a recognized processor for this target (ignoring processor) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| # RUN: not llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -o %t1 2>&1 | FileCheck %s | ||
|
|
||
| # CHECK: error: no assembly instructions found. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| if not 'X86' in config.root.targets: | ||
| config.unsupported = True | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| # RUN: not llvm-mca -mtriple=x86_64-unknown-unknown < %s 2>&1 | FileCheck %s | ||
| # RUN: not llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=generic < %s 2>&1 | FileCheck %s | ||
|
|
||
| # CHECK: error: unable to find instruction-level scheduling information for target triple 'x86_64-unknown-unknown' and cpu 'generic'. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| # RUN: not llvm-mca %t.blah -o %t2 2>&1 | FileCheck --check-prefix=ENOENT %s | ||
|
|
||
| # ENOENT: {{.*}}.blah: {{[Nn]}}o such file or directory |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| # Requires a non-empty default triple for these tests | ||
| if 'default_triple' not in config.available_features: | ||
| config.unsupported = True | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,6 +37,7 @@ subdirectories = | |
| llvm-link | ||
| llvm-lto | ||
| llvm-mc | ||
| llvm-mca | ||
| llvm-mcmarkup | ||
| llvm-modextract | ||
| llvm-mt | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,132 @@ | ||
| //===--------------------- Backend.cpp --------------------------*- C++ -*-===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| /// \file | ||
| /// | ||
| /// Implementation of class Backend which emulates an hardware OoO backend. | ||
| /// | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "Backend.h" | ||
| #include "HWEventListener.h" | ||
| #include "llvm/CodeGen/TargetSchedule.h" | ||
| #include "llvm/Support/Debug.h" | ||
|
|
||
| namespace mca { | ||
|
|
||
| #define DEBUG_TYPE "llvm-mca" | ||
|
|
||
| using namespace llvm; | ||
|
|
||
| void Backend::addEventListener(HWEventListener *Listener) { | ||
| if (Listener) | ||
| Listeners.insert(Listener); | ||
| } | ||
|
|
||
| void Backend::runCycle(unsigned Cycle) { | ||
| notifyCycleBegin(Cycle); | ||
|
|
||
| if (!SM->hasNext()) { | ||
| notifyCycleEnd(Cycle); | ||
| return; | ||
| } | ||
|
|
||
| InstRef IR = SM->peekNext(); | ||
| const InstrDesc *Desc = &IB->getOrCreateInstrDesc(STI, *IR.second); | ||
| while (DU->isAvailable(Desc->NumMicroOps) && DU->canDispatch(*Desc)) { | ||
| Instruction *NewIS = IB->createInstruction(STI, *DU, IR.first, *IR.second); | ||
| Instructions[IR.first] = std::unique_ptr<Instruction>(NewIS); | ||
| NewIS->setRCUTokenID(DU->dispatch(IR.first, NewIS)); | ||
|
|
||
| // If this is a zero latency instruction, then we don't need to dispatch | ||
| // it. Instead, we can mark it as executed. | ||
| if (NewIS->isZeroLatency()) | ||
| notifyInstructionExecuted(IR.first); | ||
|
|
||
| // Check if we have dispatched all the instructions. | ||
| SM->updateNext(); | ||
| if (!SM->hasNext()) | ||
| break; | ||
|
|
||
| // Prepare for the next round. | ||
| IR = SM->peekNext(); | ||
| Desc = &IB->getOrCreateInstrDesc(STI, *IR.second); | ||
| } | ||
|
|
||
| notifyCycleEnd(Cycle); | ||
| } | ||
|
|
||
| void Backend::notifyCycleBegin(unsigned Cycle) { | ||
| DEBUG(dbgs() << "[E] Cycle begin: " << Cycle << '\n'); | ||
| for (HWEventListener *Listener : Listeners) | ||
| Listener->onCycleBegin(Cycle); | ||
|
|
||
| DU->cycleEvent(Cycle); | ||
| HWS->cycleEvent(Cycle); | ||
| } | ||
|
|
||
| void Backend::notifyInstructionDispatched(unsigned Index) { | ||
| DEBUG(dbgs() << "[E] Instruction Dispatched: " << Index << '\n'); | ||
| for (HWEventListener *Listener : Listeners) | ||
| Listener->onInstructionDispatched(Index); | ||
| } | ||
|
|
||
| void Backend::notifyInstructionReady(unsigned Index) { | ||
| DEBUG(dbgs() << "[E] Instruction Ready: " << Index << '\n'); | ||
| for (HWEventListener *Listener : Listeners) | ||
| Listener->onInstructionReady(Index); | ||
| } | ||
|
|
||
| void Backend::notifyInstructionIssued( | ||
| unsigned Index, const ArrayRef<std::pair<ResourceRef, unsigned>> &Used) { | ||
| DEBUG( | ||
| dbgs() << "[E] Instruction Issued: " << Index << '\n'; | ||
| for (const std::pair<ResourceRef, unsigned> &Resource : Used) { | ||
| dbgs() << "[E] Resource Used: [" << Resource.first.first << '.' | ||
| << Resource.first.second << "]\n"; | ||
| dbgs() << " cycles: " << Resource.second << '\n'; | ||
| } | ||
| ); | ||
|
|
||
| for (HWEventListener *Listener : Listeners) | ||
| Listener->onInstructionIssued(Index, Used); | ||
| } | ||
|
|
||
| void Backend::notifyInstructionExecuted(unsigned Index) { | ||
| DEBUG(dbgs() << "[E] Instruction Executed: " << Index << '\n'); | ||
| for (HWEventListener *Listener : Listeners) | ||
| Listener->onInstructionExecuted(Index); | ||
|
|
||
| const Instruction &IS = *Instructions[Index]; | ||
| DU->onInstructionExecuted(IS.getRCUTokenID()); | ||
| } | ||
|
|
||
| void Backend::notifyInstructionRetired(unsigned Index) { | ||
| DEBUG(dbgs() << "[E] Instruction Retired: " << Index << '\n'); | ||
| for (HWEventListener *Listener : Listeners) | ||
| Listener->onInstructionRetired(Index); | ||
|
|
||
| const Instruction &IS = *Instructions[Index]; | ||
| DU->invalidateRegisterMappings(IS); | ||
| Instructions.erase(Index); | ||
| } | ||
|
|
||
| void Backend::notifyResourceAvailable(const ResourceRef &RR) { | ||
| DEBUG(dbgs() << "[E] Resource Available: [" << RR.first << '.' << RR.second | ||
| << "]\n"); | ||
| for (HWEventListener *Listener : Listeners) | ||
| Listener->onResourceAvailable(RR); | ||
| } | ||
|
|
||
| void Backend::notifyCycleEnd(unsigned Cycle) { | ||
| DEBUG(dbgs() << "[E] Cycle end: " << Cycle << "\n\n"); | ||
| for (HWEventListener *Listener : Listeners) | ||
| Listener->onCycleEnd(Cycle); | ||
| } | ||
|
|
||
| } // namespace mca. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,141 @@ | ||
| //===--------------------- Backend.h ----------------------------*- C++ -*-===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| /// \file | ||
| /// | ||
| /// This file implements an OoO backend for the llvm-mca tool. | ||
| /// | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_TOOLS_LLVM_MCA_BACKEND_H | ||
| #define LLVM_TOOLS_LLVM_MCA_BACKEND_H | ||
|
|
||
| #include "Dispatch.h" | ||
| #include "InstrBuilder.h" | ||
| #include "Scheduler.h" | ||
| #include "SourceMgr.h" | ||
|
|
||
| namespace mca { | ||
|
|
||
| struct HWEventListener; | ||
|
|
||
| /// \brief An out of order backend for a specific subtarget. | ||
| /// | ||
| /// It emulates an out-of-order execution of instructions. Instructions are | ||
| /// fetched from a MCInst sequence managed by an object of class SourceMgr. | ||
| /// Instructions are firstly dispatched to the schedulers and then executed. | ||
| /// This class tracks the lifetime of an instruction from the moment where | ||
| /// it gets dispatched to the schedulers, to the moment where it finishes | ||
| /// executing and register writes are architecturally committed. | ||
| /// In particular, it monitors changes in the state of every instruction | ||
| /// in flight. | ||
| /// Instructions are executed in a loop of iterations. The number of iterations | ||
| /// is defined by the SourceMgr object. | ||
| /// The Backend entrypoint is method 'Run()' which execute cycles in a loop | ||
| /// until there are new instructions to dispatch, and not every instruction | ||
| /// has been retired. | ||
| /// Internally, the Backend collects statistical information in the form of | ||
| /// histograms. For example, it tracks how the dispatch group size changes | ||
| /// over time. | ||
| class Backend { | ||
| const llvm::MCSubtargetInfo &STI; | ||
|
|
||
| std::unique_ptr<InstrBuilder> IB; | ||
| std::unique_ptr<Scheduler> HWS; | ||
| std::unique_ptr<DispatchUnit> DU; | ||
| std::unique_ptr<SourceMgr> SM; | ||
| unsigned Cycles; | ||
|
|
||
| llvm::DenseMap<unsigned, std::unique_ptr<Instruction>> Instructions; | ||
| std::set<HWEventListener *> Listeners; | ||
|
|
||
| void runCycle(unsigned Cycle); | ||
|
|
||
| public: | ||
| Backend(const llvm::MCSubtargetInfo &Subtarget, const llvm::MCInstrInfo &MCII, | ||
| const llvm::MCRegisterInfo &MRI, std::unique_ptr<SourceMgr> Source, | ||
| unsigned DispatchWidth = 0, unsigned RegisterFileSize = 0, | ||
| unsigned MaxRetirePerCycle = 0, unsigned LoadQueueSize = 0, | ||
| unsigned StoreQueueSize = 0, bool AssumeNoAlias = false) | ||
| : STI(Subtarget), | ||
| HWS(llvm::make_unique<Scheduler>(this, Subtarget.getSchedModel(), | ||
| LoadQueueSize, StoreQueueSize, | ||
| AssumeNoAlias)), | ||
| DU(llvm::make_unique<DispatchUnit>( | ||
| this, MRI, Subtarget.getSchedModel().MicroOpBufferSize, | ||
| RegisterFileSize, MaxRetirePerCycle, DispatchWidth, HWS.get())), | ||
| SM(std::move(Source)), Cycles(0) { | ||
| IB = llvm::make_unique<InstrBuilder>(MCII, getProcResourceMasks()); | ||
| } | ||
|
|
||
| void run() { | ||
| while (SM->hasNext() || !DU->isRCUEmpty()) | ||
| runCycle(Cycles++); | ||
| } | ||
|
|
||
| unsigned getNumIterations() const { return SM->getNumIterations(); } | ||
| unsigned getNumInstructions() const { return SM->size(); } | ||
| unsigned getNumCycles() const { return Cycles; } | ||
| unsigned getTotalRegisterMappingsCreated() const { | ||
| return DU->getTotalRegisterMappingsCreated(); | ||
| } | ||
| unsigned getMaxUsedRegisterMappings() const { | ||
| return DU->getMaxUsedRegisterMappings(); | ||
| } | ||
| unsigned getDispatchWidth() const { return DU->getDispatchWidth(); } | ||
|
|
||
| const llvm::MCSubtargetInfo &getSTI() const { return STI; } | ||
| const llvm::MCSchedModel &getSchedModel() const { | ||
| return STI.getSchedModel(); | ||
| } | ||
| const llvm::ArrayRef<uint64_t> getProcResourceMasks() const { | ||
| return HWS->getProcResourceMasks(); | ||
| } | ||
|
|
||
| double getRThroughput(const InstrDesc &ID) const { | ||
| return HWS->getRThroughput(ID); | ||
| } | ||
| void getBuffersUsage(std::vector<BufferUsageEntry> &Usage) const { | ||
| return HWS->getBuffersUsage(Usage); | ||
| } | ||
|
|
||
| unsigned getNumRATStalls() const { return DU->getNumRATStalls(); } | ||
| unsigned getNumRCUStalls() const { return DU->getNumRCUStalls(); } | ||
| unsigned getNumSQStalls() const { return DU->getNumSQStalls(); } | ||
| unsigned getNumLDQStalls() const { return DU->getNumLDQStalls(); } | ||
| unsigned getNumSTQStalls() const { return DU->getNumSTQStalls(); } | ||
| unsigned getNumDispatchGroupStalls() const { | ||
| return DU->getNumDispatchGroupStalls(); | ||
| } | ||
|
|
||
| const llvm::MCInst &getMCInstFromIndex(unsigned Index) const { | ||
| return SM->getMCInstFromIndex(Index); | ||
| } | ||
|
|
||
| const InstrDesc &getInstrDesc(const llvm::MCInst &Inst) const { | ||
| return IB->getOrCreateInstrDesc(STI, Inst); | ||
| } | ||
|
|
||
| const SourceMgr &getSourceMgr() const { return *SM; } | ||
|
|
||
| void addEventListener(HWEventListener *Listener); | ||
| void notifyCycleBegin(unsigned Cycle); | ||
| void notifyInstructionDispatched(unsigned Index); | ||
| void notifyInstructionReady(unsigned Index); | ||
| void notifyInstructionIssued( | ||
| unsigned Index, | ||
| const llvm::ArrayRef<std::pair<ResourceRef, unsigned>> &Used); | ||
| void notifyInstructionExecuted(unsigned Index); | ||
| void notifyResourceAvailable(const ResourceRef &RR); | ||
| void notifyInstructionRetired(unsigned Index); | ||
| void notifyCycleEnd(unsigned Cycle); | ||
| }; | ||
|
|
||
| } // namespace mca | ||
|
|
||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,209 @@ | ||
| //===--------------------- BackendPrinter.cpp -------------------*- C++ -*-===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| /// \file | ||
| /// | ||
| /// This file implements the BackendPrinter interface. | ||
| /// | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "BackendPrinter.h" | ||
| #include "llvm/CodeGen/TargetSchedule.h" | ||
|
|
||
| namespace mca { | ||
|
|
||
| using namespace llvm; | ||
|
|
||
| std::unique_ptr<ToolOutputFile> | ||
| BackendPrinter::getOutputStream(std::string OutputFile) { | ||
| if (OutputFile == "") | ||
| OutputFile = "-"; | ||
| std::error_code EC; | ||
| auto Out = llvm::make_unique<ToolOutputFile>(OutputFile, EC, sys::fs::F_None); | ||
| if (!EC) | ||
| return Out; | ||
| errs() << EC.message() << '\n'; | ||
| return nullptr; | ||
| } | ||
|
|
||
| void BackendPrinter::printGeneralStatistics(unsigned Iterations, | ||
| unsigned Cycles, | ||
| unsigned Instructions, | ||
| unsigned DispatchWidth) const { | ||
| unsigned TotalInstructions = Instructions * Iterations; | ||
| double IPC = (double)TotalInstructions / Cycles; | ||
|
|
||
| std::string Buffer; | ||
| raw_string_ostream TempStream(Buffer); | ||
| TempStream << "Iterations: " << Iterations; | ||
| TempStream << "\nInstructions: " << TotalInstructions; | ||
| TempStream << "\nTotal Cycles: " << Cycles; | ||
| TempStream << "\nDispatch Width: " << DispatchWidth; | ||
| TempStream << "\nIPC: " << format("%.2f", IPC) << '\n'; | ||
| TempStream.flush(); | ||
| File->os() << Buffer; | ||
| } | ||
|
|
||
| void BackendPrinter::printRATStatistics(unsigned TotalMappings, | ||
| unsigned MaxUsedMappings) const { | ||
| std::string Buffer; | ||
| raw_string_ostream TempStream(Buffer); | ||
| TempStream << "\n\nRegister Alias Table:"; | ||
| TempStream << "\nTotal number of mappings created: " << TotalMappings; | ||
| TempStream << "\nMax number of mappings used: " << MaxUsedMappings | ||
| << '\n'; | ||
| TempStream.flush(); | ||
| File->os() << Buffer; | ||
| } | ||
|
|
||
| void BackendPrinter::printDispatchStalls(unsigned RATStalls, unsigned RCUStalls, | ||
| unsigned SCHEDQStalls, | ||
| unsigned LDQStalls, unsigned STQStalls, | ||
| unsigned DGStalls) const { | ||
| std::string Buffer; | ||
| raw_string_ostream TempStream(Buffer); | ||
| TempStream << "\n\nDynamic Dispatch Stall Cycles:\n"; | ||
| TempStream << "RAT - Register unavailable: " | ||
| << RATStalls; | ||
| TempStream << "\nRCU - Retire tokens unavailable: " | ||
| << RCUStalls; | ||
| TempStream << "\nSCHEDQ - Scheduler full: " | ||
| << SCHEDQStalls; | ||
| TempStream << "\nLQ - Load queue full: " | ||
| << LDQStalls; | ||
| TempStream << "\nSQ - Store queue full: " | ||
| << STQStalls; | ||
| TempStream << "\nGROUP - Static restrictions on the dispatch group: " | ||
| << DGStalls; | ||
| TempStream << '\n'; | ||
| TempStream.flush(); | ||
| File->os() << Buffer; | ||
| } | ||
|
|
||
| void BackendPrinter::printSchedulerUsage( | ||
| const MCSchedModel &SM, const ArrayRef<BufferUsageEntry> &Usage) const { | ||
| std::string Buffer; | ||
| raw_string_ostream TempStream(Buffer); | ||
| TempStream << "\n\nScheduler's queue usage:\n"; | ||
| const ArrayRef<uint64_t> ResourceMasks = B.getProcResourceMasks(); | ||
| for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { | ||
| const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); | ||
| if (!ProcResource.BufferSize) | ||
| continue; | ||
|
|
||
| for (const BufferUsageEntry &Entry : Usage) | ||
| if (ResourceMasks[I] == Entry.first) | ||
| TempStream << ProcResource.Name << ", " << Entry.second << '/' | ||
| << ProcResource.BufferSize << '\n'; | ||
| } | ||
|
|
||
| TempStream.flush(); | ||
| File->os() << Buffer; | ||
| } | ||
|
|
||
| void BackendPrinter::printInstructionInfo() const { | ||
| std::string Buffer; | ||
| raw_string_ostream TempStream(Buffer); | ||
|
|
||
| TempStream << "\n\nInstruction Info:\n"; | ||
| TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n" | ||
| << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects\n\n"; | ||
|
|
||
| TempStream << "[1] [2] [3] [4] [5] [6]\tInstructions:\n"; | ||
| for (unsigned I = 0, E = B.getNumInstructions(); I < E; ++I) { | ||
| const MCInst &Inst = B.getMCInstFromIndex(I); | ||
| const InstrDesc &ID = B.getInstrDesc(Inst); | ||
| unsigned NumMicroOpcodes = ID.NumMicroOps; | ||
| unsigned Latency = ID.MaxLatency; | ||
| double RThroughput = B.getRThroughput(ID); | ||
| TempStream << ' ' << NumMicroOpcodes << " "; | ||
| if (NumMicroOpcodes < 10) | ||
| TempStream << " "; | ||
| else if (NumMicroOpcodes < 100) | ||
| TempStream << ' '; | ||
| TempStream << Latency << " "; | ||
| if (Latency < 10.0) | ||
| TempStream << " "; | ||
| else if (Latency < 100.0) | ||
| TempStream << ' '; | ||
| if (RThroughput) { | ||
| TempStream << format("%.2f", RThroughput) << ' '; | ||
| if (RThroughput < 10.0) | ||
| TempStream << " "; | ||
| else if (RThroughput < 100.0) | ||
| TempStream << ' '; | ||
| } else { | ||
| TempStream << " - "; | ||
| } | ||
| TempStream << (ID.MayLoad ? " * " : " "); | ||
| TempStream << (ID.MayStore ? " * " : " "); | ||
| TempStream << (ID.HasSideEffects ? " * " : " "); | ||
| MCIP->printInst(&Inst, TempStream, "", B.getSTI()); | ||
| TempStream << '\n'; | ||
| } | ||
|
|
||
| TempStream.flush(); | ||
| File->os() << Buffer; | ||
| } | ||
|
|
||
| void BackendPrinter::printReport() const { | ||
| assert(isFileValid()); | ||
| unsigned Cycles = B.getNumCycles(); | ||
| printGeneralStatistics(B.getNumIterations(), Cycles, B.getNumInstructions(), | ||
| B.getDispatchWidth()); | ||
| if (EnableVerboseOutput) { | ||
| printDispatchStalls(B.getNumRATStalls(), B.getNumRCUStalls(), | ||
| B.getNumSQStalls(), B.getNumLDQStalls(), | ||
| B.getNumSTQStalls(), B.getNumDispatchGroupStalls()); | ||
| printRATStatistics(B.getTotalRegisterMappingsCreated(), | ||
| B.getMaxUsedRegisterMappings()); | ||
| BS->printHistograms(File->os()); | ||
|
|
||
| std::vector<BufferUsageEntry> Usage; | ||
| B.getBuffersUsage(Usage); | ||
| printSchedulerUsage(B.getSchedModel(), Usage); | ||
| } | ||
|
|
||
| if (RPV) { | ||
| RPV->printResourcePressure(getOStream(), Cycles); | ||
| printInstructionInfo(); | ||
| } | ||
|
|
||
| if (TV) { | ||
| TV->printTimeline(getOStream()); | ||
| TV->printAverageWaitTimes(getOStream()); | ||
| } | ||
| } | ||
|
|
||
| void BackendPrinter::addResourcePressureView() { | ||
| if (!RPV) { | ||
| RPV = llvm::make_unique<ResourcePressureView>( | ||
| B.getSTI(), *MCIP, B.getSourceMgr(), B.getProcResourceMasks()); | ||
| B.addEventListener(RPV.get()); | ||
| } | ||
| } | ||
|
|
||
| void BackendPrinter::addTimelineView(unsigned MaxIterations, | ||
| unsigned MaxCycles) { | ||
| if (!TV) { | ||
| TV = llvm::make_unique<TimelineView>(B.getSTI(), *MCIP, B.getSourceMgr(), | ||
| MaxIterations, MaxCycles); | ||
| B.addEventListener(TV.get()); | ||
| } | ||
| } | ||
|
|
||
| void BackendPrinter::initialize(std::string OutputFileName) { | ||
| File = getOutputStream(OutputFileName); | ||
| MCIP->setPrintImmHex(false); | ||
| if (EnableVerboseOutput) { | ||
| BS = llvm::make_unique<BackendStatistics>(); | ||
| B.addEventListener(BS.get()); | ||
| } | ||
| } | ||
|
|
||
| } // namespace mca. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,102 @@ | ||
| //===--------------------- BackendPrinter.h ---------------------*- C++ -*-===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| /// \file | ||
| /// | ||
| /// This file implements class BackendPrinter. | ||
| /// BackendPrinter is able to collect statistics related to the code executed | ||
| /// by the Backend class. Information is then printed out with the help of | ||
| /// a MCInstPrinter (to pretty print MCInst objects) and other helper classes. | ||
| /// | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_TOOLS_LLVM_MCA_BACKENDPRINTER_H | ||
| #define LLVM_TOOLS_LLVM_MCA_BACKENDPRINTER_H | ||
|
|
||
| #include "Backend.h" | ||
| #include "BackendStatistics.h" | ||
| #include "ResourcePressureView.h" | ||
| #include "TimelineView.h" | ||
| #include "llvm/MC/MCInstPrinter.h" | ||
| #include "llvm/Support/Debug.h" | ||
| #include "llvm/Support/FileUtilities.h" | ||
| #include "llvm/Support/ToolOutputFile.h" | ||
|
|
||
| #define DEBUG_TYPE "llvm-mca" | ||
|
|
||
| namespace mca { | ||
|
|
||
| class ResourcePressureView; | ||
| class TimelineView; | ||
|
|
||
| /// \brief A printer class that knows how to collects statistics on the | ||
| /// code analyzed by the llvm-mca tool. | ||
| /// | ||
| /// This class knows how to print out the analysis information collected | ||
| /// during the execution of the code. Internally, it delegates to other | ||
| /// classes the task of printing out timeline information as well as | ||
| /// resource pressure. | ||
| class BackendPrinter { | ||
| Backend &B; | ||
| bool EnableVerboseOutput; | ||
|
|
||
| std::unique_ptr<llvm::MCInstPrinter> MCIP; | ||
| std::unique_ptr<llvm::ToolOutputFile> File; | ||
|
|
||
| std::unique_ptr<ResourcePressureView> RPV; | ||
| std::unique_ptr<TimelineView> TV; | ||
| std::unique_ptr<BackendStatistics> BS; | ||
|
|
||
| using Histogram = std::map<unsigned, unsigned>; | ||
| void printDUStatistics(const Histogram &Stats, unsigned Cycles) const; | ||
| void printDispatchStalls(unsigned RATStalls, unsigned RCUStalls, | ||
| unsigned SQStalls, unsigned LDQStalls, | ||
| unsigned STQStalls, unsigned DGStalls) const; | ||
| void printRATStatistics(unsigned Mappings, unsigned MaxUsedMappings) const; | ||
| void printRCUStatistics(const Histogram &Histogram, unsigned Cycles) const; | ||
| void printIssuePerCycle(const Histogram &IssuePerCycle, | ||
| unsigned TotalCycles) const; | ||
| void printSchedulerUsage(const llvm::MCSchedModel &SM, | ||
| const llvm::ArrayRef<BufferUsageEntry> &Usage) const; | ||
| void printGeneralStatistics(unsigned Iterations, unsigned Cycles, | ||
| unsigned Instructions, | ||
| unsigned DispatchWidth) const; | ||
| void printInstructionInfo() const; | ||
|
|
||
| std::unique_ptr<llvm::ToolOutputFile> getOutputStream(std::string OutputFile); | ||
| void initialize(std::string OputputFileName); | ||
|
|
||
| public: | ||
| BackendPrinter(Backend &backend, std::string OutputFileName, | ||
| std::unique_ptr<llvm::MCInstPrinter> IP, bool EnableVerbose) | ||
| : B(backend), EnableVerboseOutput(EnableVerbose), MCIP(std::move(IP)) { | ||
| initialize(OutputFileName); | ||
| } | ||
|
|
||
| ~BackendPrinter() { | ||
| if (File) | ||
| File->keep(); | ||
| } | ||
|
|
||
| bool isFileValid() const { return File.get(); } | ||
| llvm::raw_ostream &getOStream() const { | ||
| assert(isFileValid()); | ||
| return File->os(); | ||
| } | ||
|
|
||
| llvm::MCInstPrinter &getMCInstPrinter() const { return *MCIP; } | ||
|
|
||
| void addResourcePressureView(); | ||
| void addTimelineView(unsigned MaxIterations = 3, unsigned MaxCycles = 80); | ||
|
|
||
| void printReport() const; | ||
| }; | ||
|
|
||
| } // namespace mca | ||
|
|
||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| //===--------------------- BackendStatistics.cpp ---------------*- C++ -*-===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| /// \file | ||
| /// | ||
| /// Functionalities used by the BackendPrinter to print out histograms | ||
| /// related to number of {dispatch/issue/retire} per number of cycles. | ||
| /// | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "BackendStatistics.h" | ||
| #include "llvm/Support/Format.h" | ||
|
|
||
| using namespace llvm; | ||
|
|
||
| namespace mca { | ||
|
|
||
| void BackendStatistics::printRetireUnitStatistics(llvm::raw_ostream &OS) const { | ||
| std::string Buffer; | ||
| raw_string_ostream TempStream(Buffer); | ||
| TempStream << "\n\nRetire Control Unit - " | ||
| << "number of cycles where we saw N instructions retired:\n"; | ||
| TempStream << "[# retired], [# cycles]\n"; | ||
|
|
||
| for (const std::pair<unsigned, unsigned> &Entry : RetiredPerCycle) { | ||
| TempStream << " " << Entry.first; | ||
| if (Entry.first < 10) | ||
| TempStream << ", "; | ||
| else | ||
| TempStream << ", "; | ||
| TempStream << Entry.second << " (" | ||
| << format("%.1f", ((double)Entry.second / NumCycles) * 100.0) | ||
| << "%)\n"; | ||
| } | ||
|
|
||
| TempStream.flush(); | ||
| OS << Buffer; | ||
| } | ||
|
|
||
| void BackendStatistics::printDispatchUnitStatistics(llvm::raw_ostream &OS) const { | ||
| std::string Buffer; | ||
| raw_string_ostream TempStream(Buffer); | ||
| TempStream << "\n\nDispatch Logic - " | ||
| << "number of cycles where we saw N instructions dispatched:\n"; | ||
| TempStream << "[# dispatched], [# cycles]\n"; | ||
| for (const std::pair<unsigned, unsigned> &Entry : DispatchGroupSizePerCycle) { | ||
| TempStream << " " << Entry.first << ", " << Entry.second | ||
| << " (" | ||
| << format("%.1f", ((double)Entry.second / NumCycles) * 100.0) | ||
| << "%)\n"; | ||
| } | ||
|
|
||
| TempStream.flush(); | ||
| OS << Buffer; | ||
| } | ||
|
|
||
| void BackendStatistics::printSchedulerStatistics(llvm::raw_ostream &OS) const { | ||
| std::string Buffer; | ||
| raw_string_ostream TempStream(Buffer); | ||
| TempStream << "\n\nSchedulers - number of cycles where we saw N instructions " | ||
| "issued:\n"; | ||
| TempStream << "[# issued], [# cycles]\n"; | ||
| for (const std::pair<unsigned, unsigned> &Entry : IssuedPerCycle) { | ||
| TempStream << " " << Entry.first << ", " << Entry.second << " (" | ||
| << format("%.1f", ((double)Entry.second / NumCycles) * 100) | ||
| << "%)\n"; | ||
| } | ||
|
|
||
| TempStream.flush(); | ||
| OS << Buffer; | ||
| } | ||
|
|
||
| } // namespace mca | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,95 @@ | ||
| //===--------------------- BackendStatistics.h ------------------*- C++ -*-===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| /// \file | ||
| /// | ||
| /// This file implements a printer class for printing generic Backend | ||
| /// statistics related to the dispatch logic, scheduler and retire unit. | ||
| /// | ||
| /// Example: | ||
| /// ======== | ||
| /// | ||
| /// Dispatch Logic - number of cycles where we saw N instructions dispatched: | ||
| /// [# dispatched], [# cycles] | ||
| /// 0, 15 (11.5%) | ||
| /// 5, 4 (3.1%) | ||
| /// | ||
| /// Schedulers - number of cycles where we saw N instructions issued: | ||
| /// [# issued], [# cycles] | ||
| /// 0, 7 (5.4%) | ||
| /// 1, 4 (3.1%) | ||
| /// 2, 8 (6.2%) | ||
| /// | ||
| /// Retire Control Unit - number of cycles where we saw N instructions retired: | ||
| /// [# retired], [# cycles] | ||
| /// 0, 9 (6.9%) | ||
| /// 1, 6 (4.6%) | ||
| /// 2, 1 (0.8%) | ||
| /// 4, 3 (2.3%) | ||
| /// | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_TOOLS_LLVM_MCA_BACKENDSTATISTICS_H | ||
| #define LLVM_TOOLS_LLVM_MCA_BACKENDSTATISTICS_H | ||
|
|
||
| #include "HWEventListener.h" | ||
| #include "llvm/Support/raw_ostream.h" | ||
| #include <map> | ||
|
|
||
| namespace mca { | ||
|
|
||
| class BackendStatistics : public HWEventListener { | ||
| using Histogram = std::map<unsigned, unsigned>; | ||
| Histogram DispatchGroupSizePerCycle; | ||
| Histogram RetiredPerCycle; | ||
| Histogram IssuedPerCycle; | ||
|
|
||
| unsigned NumDispatched; | ||
| unsigned NumIssued; | ||
| unsigned NumRetired; | ||
| unsigned NumCycles; | ||
|
|
||
| void updateHistograms() { | ||
| DispatchGroupSizePerCycle[NumDispatched]++; | ||
| IssuedPerCycle[NumIssued]++; | ||
| RetiredPerCycle[NumRetired]++; | ||
| NumDispatched = 0; | ||
| NumIssued = 0; | ||
| NumRetired = 0; | ||
| } | ||
|
|
||
| void printRetireUnitStatistics(llvm::raw_ostream &OS) const; | ||
| void printDispatchUnitStatistics(llvm::raw_ostream &OS) const; | ||
| void printSchedulerStatistics(llvm::raw_ostream &OS) const; | ||
|
|
||
| public: | ||
| BackendStatistics() : NumDispatched(0), NumIssued(0), NumRetired(0) {} | ||
|
|
||
| void onInstructionDispatched(unsigned Index) override { NumDispatched++; } | ||
| void | ||
| onInstructionIssued(unsigned Index, | ||
| const llvm::ArrayRef<std::pair<ResourceRef, unsigned>> | ||
| & /* unused */) override { | ||
| NumIssued++; | ||
| } | ||
| void onInstructionRetired(unsigned Index) override { NumRetired++; } | ||
|
|
||
| void onCycleBegin(unsigned Cycle) override { NumCycles++; } | ||
|
|
||
| void onCycleEnd(unsigned Cycle) override { updateHistograms(); } | ||
|
|
||
| void printHistograms(llvm::raw_ostream &OS) { | ||
| printDispatchUnitStatistics(OS); | ||
| printSchedulerStatistics(OS); | ||
| printRetireUnitStatistics(OS); | ||
| } | ||
| }; | ||
|
|
||
| } // namespace mca | ||
|
|
||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| set(LLVM_LINK_COMPONENTS | ||
| AllTargetsAsmPrinters | ||
| AllTargetsAsmParsers | ||
| AllTargetsDescs | ||
| AllTargetsDisassemblers | ||
| AllTargetsInfos | ||
| MC | ||
| MCParser | ||
| Support | ||
| ) | ||
|
|
||
| add_llvm_tool(llvm-mca | ||
| Backend.cpp | ||
| BackendPrinter.cpp | ||
| BackendStatistics.cpp | ||
| Dispatch.cpp | ||
| HWEventListener.cpp | ||
| InstrBuilder.cpp | ||
| Instruction.cpp | ||
| LSUnit.cpp | ||
| llvm-mca.cpp | ||
| ResourcePressureView.cpp | ||
| Scheduler.cpp | ||
| TimelineView.cpp | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,268 @@ | ||
| //===--------------------- Dispatch.cpp -------------------------*- C++ -*-===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| /// \file | ||
| /// | ||
| /// This file implements methods declared by class RegisterFile, DispatchUnit | ||
| /// and RetireControlUnit. | ||
| /// | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "Dispatch.h" | ||
| #include "Backend.h" | ||
| #include "Scheduler.h" | ||
| #include "llvm/Support/Debug.h" | ||
|
|
||
| using namespace llvm; | ||
|
|
||
| #define DEBUG_TYPE "llvm-mca" | ||
|
|
||
| namespace mca { | ||
|
|
||
| void RegisterFile::addRegisterMapping(WriteState &WS) { | ||
| unsigned RegID = WS.getRegisterID(); | ||
| assert(RegID && "Adding an invalid register definition?"); | ||
|
|
||
| RegisterMappings[RegID] = &WS; | ||
| for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) | ||
| RegisterMappings[*I] = &WS; | ||
| if (MaxUsedMappings == NumUsedMappings) | ||
| MaxUsedMappings++; | ||
| NumUsedMappings++; | ||
| TotalMappingsCreated++; | ||
| // If this is a partial update, then we are done. | ||
| if (!WS.fullyUpdatesSuperRegs()) | ||
| return; | ||
|
|
||
| for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) | ||
| RegisterMappings[*I] = &WS; | ||
| } | ||
|
|
||
| void RegisterFile::invalidateRegisterMapping(const WriteState &WS) { | ||
| unsigned RegID = WS.getRegisterID(); | ||
| bool ShouldInvalidateSuperRegs = WS.fullyUpdatesSuperRegs(); | ||
|
|
||
| assert(RegID != 0 && "Invalidating an already invalid register?"); | ||
| assert(WS.getCyclesLeft() != -512 && | ||
| "Invalidating a write of unknown cycles!"); | ||
| assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!"); | ||
| if (!RegisterMappings[RegID]) | ||
| return; | ||
|
|
||
| assert(NumUsedMappings); | ||
| NumUsedMappings--; | ||
|
|
||
| if (RegisterMappings[RegID] == &WS) | ||
| RegisterMappings[RegID] = nullptr; | ||
|
|
||
| for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) | ||
| if (RegisterMappings[*I] == &WS) | ||
| RegisterMappings[*I] = nullptr; | ||
|
|
||
| if (!ShouldInvalidateSuperRegs) | ||
| return; | ||
|
|
||
| for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) | ||
| if (RegisterMappings[*I] == &WS) | ||
| RegisterMappings[*I] = nullptr; | ||
| } | ||
|
|
||
| // Update the number of used mappings in the event of instruction retired. | ||
| // This mehod delegates to the register file the task of invalidating | ||
| // register mappings that were created for instruction IS. | ||
| void DispatchUnit::invalidateRegisterMappings(const Instruction &IS) { | ||
| for (const std::unique_ptr<WriteState> &WS : IS.getDefs()) { | ||
| DEBUG(dbgs() << "[RAT] Invalidating mapping for: "); | ||
| DEBUG(WS->dump()); | ||
| RAT->invalidateRegisterMapping(*WS.get()); | ||
| } | ||
| } | ||
|
|
||
| void RegisterFile::collectWrites(SmallVectorImpl<WriteState *> &Writes, | ||
| unsigned RegID) const { | ||
| assert(RegID && RegID < RegisterMappings.size()); | ||
| WriteState *WS = RegisterMappings[RegID]; | ||
| if (WS) { | ||
| DEBUG(dbgs() << "Found a dependent use of RegID=" << RegID << '\n'); | ||
| Writes.push_back(WS); | ||
| } | ||
|
|
||
| // Handle potential partial register updates. | ||
| for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { | ||
| WS = RegisterMappings[*I]; | ||
| if (WS && std::find(Writes.begin(), Writes.end(), WS) == Writes.end()) { | ||
| DEBUG(dbgs() << "Found a dependent use of subReg " << *I << " (part of " | ||
| << RegID << ")\n"); | ||
| Writes.push_back(WS); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| bool RegisterFile::isAvailable(unsigned NumRegWrites) { | ||
| if (!TotalMappings) | ||
| return true; | ||
| if (NumRegWrites > TotalMappings) { | ||
| // The user specified a too small number of registers. | ||
| // Artificially set the number of temporaries to NumRegWrites. | ||
| errs() << "warning: not enough temporaries in the register file. " | ||
| << "The register file size has been automatically increased to " | ||
| << NumRegWrites << '\n'; | ||
| TotalMappings = NumRegWrites; | ||
| } | ||
|
|
||
| return NumRegWrites + NumUsedMappings <= TotalMappings; | ||
| } | ||
|
|
||
| #ifndef NDEBUG | ||
| void RegisterFile::dump() const { | ||
| for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) | ||
| if (RegisterMappings[I]) { | ||
| dbgs() << MRI.getName(I) << ", " << I << ", "; | ||
| RegisterMappings[I]->dump(); | ||
| } | ||
|
|
||
| dbgs() << "TotalMappingsCreated: " << TotalMappingsCreated | ||
| << ", MaxUsedMappings: " << MaxUsedMappings | ||
| << ", NumUsedMappings: " << NumUsedMappings << '\n'; | ||
| } | ||
| #endif | ||
|
|
||
| // Reserves a number of slots, and returns a new token. | ||
| unsigned RetireControlUnit::reserveSlot(unsigned Index, unsigned NumMicroOps) { | ||
| assert(isAvailable(NumMicroOps)); | ||
| unsigned NormalizedQuantity = | ||
| std::min(NumMicroOps, static_cast<unsigned>(Queue.size())); | ||
| // Zero latency instructions may have zero mOps. Artificially bump this | ||
| // value to 1. Although zero latency instructions don't consume scheduler | ||
| // resources, they still consume one slot in the retire queue. | ||
| NormalizedQuantity = std::max(NormalizedQuantity, 1U); | ||
| unsigned TokenID = NextAvailableSlotIdx; | ||
| Queue[NextAvailableSlotIdx] = {Index, NormalizedQuantity, false}; | ||
| NextAvailableSlotIdx += NormalizedQuantity; | ||
| NextAvailableSlotIdx %= Queue.size(); | ||
| AvailableSlots -= NormalizedQuantity; | ||
| return TokenID; | ||
| } | ||
|
|
||
| void DispatchUnit::notifyInstructionDispatched(unsigned Index) { | ||
| Owner->notifyInstructionDispatched(Index); | ||
| } | ||
|
|
||
| void DispatchUnit::notifyInstructionRetired(unsigned Index) { | ||
| Owner->notifyInstructionRetired(Index); | ||
| } | ||
|
|
||
| void RetireControlUnit::cycleEvent() { | ||
| if (isEmpty()) | ||
| return; | ||
|
|
||
| unsigned NumRetired = 0; | ||
| while (!isEmpty()) { | ||
| if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle) | ||
| break; | ||
| RUToken &Current = Queue[CurrentInstructionSlotIdx]; | ||
| assert(Current.NumSlots && "Reserved zero slots?"); | ||
| if (!Current.Executed) | ||
| break; | ||
| Owner->notifyInstructionRetired(Current.Index); | ||
| CurrentInstructionSlotIdx += Current.NumSlots; | ||
| CurrentInstructionSlotIdx %= Queue.size(); | ||
| AvailableSlots += Current.NumSlots; | ||
| NumRetired++; | ||
| } | ||
| } | ||
|
|
||
| void RetireControlUnit::onInstructionExecuted(unsigned TokenID) { | ||
| assert(Queue.size() > TokenID); | ||
| assert(Queue[TokenID].Executed == false && Queue[TokenID].Index != ~0U); | ||
| Queue[TokenID].Executed = true; | ||
| } | ||
|
|
||
| #ifndef NDEBUG | ||
| void RetireControlUnit::dump() const { | ||
| dbgs() << "Retire Unit: { Total Slots=" << Queue.size() | ||
| << ", Available Slots=" << AvailableSlots << " }\n"; | ||
| } | ||
| #endif | ||
|
|
||
| bool DispatchUnit::checkRAT(const InstrDesc &Desc) { | ||
| unsigned NumWrites = Desc.Writes.size(); | ||
| if (RAT->isAvailable(NumWrites)) | ||
| return true; | ||
| DispatchStalls[DS_RAT_REG_UNAVAILABLE]++; | ||
| return false; | ||
| } | ||
|
|
||
| bool DispatchUnit::checkRCU(const InstrDesc &Desc) { | ||
| unsigned NumMicroOps = Desc.NumMicroOps; | ||
| if (RCU->isAvailable(NumMicroOps)) | ||
| return true; | ||
| DispatchStalls[DS_RCU_TOKEN_UNAVAILABLE]++; | ||
| return false; | ||
| } | ||
|
|
||
| bool DispatchUnit::checkScheduler(const InstrDesc &Desc) { | ||
| // If this is a zero-latency instruction, then it bypasses | ||
| // the scheduler. | ||
| switch (SC->canBeDispatched(Desc)) { | ||
| case Scheduler::HWS_AVAILABLE: | ||
| return true; | ||
| case Scheduler::HWS_QUEUE_UNAVAILABLE: | ||
| DispatchStalls[DS_SQ_TOKEN_UNAVAILABLE]++; | ||
| break; | ||
| case Scheduler::HWS_LD_QUEUE_UNAVAILABLE: | ||
| DispatchStalls[DS_LDQ_TOKEN_UNAVAILABLE]++; | ||
| break; | ||
| case Scheduler::HWS_ST_QUEUE_UNAVAILABLE: | ||
| DispatchStalls[DS_STQ_TOKEN_UNAVAILABLE]++; | ||
| break; | ||
| case Scheduler::HWS_DISPATCH_GROUP_RESTRICTION: | ||
| DispatchStalls[DS_DISPATCH_GROUP_RESTRICTION]++; | ||
| } | ||
|
|
||
| return false; | ||
| } | ||
|
|
||
| unsigned DispatchUnit::dispatch(unsigned IID, Instruction *NewInst) { | ||
| assert(!CarryOver && "Cannot dispatch another instruction!"); | ||
| unsigned NumMicroOps = NewInst->getDesc().NumMicroOps; | ||
| if (NumMicroOps > DispatchWidth) { | ||
| assert(AvailableEntries == DispatchWidth); | ||
| AvailableEntries = 0; | ||
| CarryOver = NumMicroOps - DispatchWidth; | ||
| } else { | ||
| assert(AvailableEntries >= NumMicroOps); | ||
| AvailableEntries -= NumMicroOps; | ||
| } | ||
|
|
||
| // Reserve slots in the RCU. | ||
| unsigned RCUTokenID = RCU->reserveSlot(IID, NumMicroOps); | ||
| Owner->notifyInstructionDispatched(IID); | ||
|
|
||
| SC->scheduleInstruction(IID, NewInst); | ||
| return RCUTokenID; | ||
| } | ||
|
|
||
| #ifndef NDEBUG | ||
| void DispatchUnit::dump() const { | ||
| RAT->dump(); | ||
| RCU->dump(); | ||
|
|
||
| unsigned DSRAT = DispatchStalls[DS_RAT_REG_UNAVAILABLE]; | ||
| unsigned DSRCU = DispatchStalls[DS_RCU_TOKEN_UNAVAILABLE]; | ||
| unsigned DSSCHEDQ = DispatchStalls[DS_SQ_TOKEN_UNAVAILABLE]; | ||
| unsigned DSLQ = DispatchStalls[DS_LDQ_TOKEN_UNAVAILABLE]; | ||
| unsigned DSSQ = DispatchStalls[DS_STQ_TOKEN_UNAVAILABLE]; | ||
|
|
||
| dbgs() << "STALLS --- RAT: " << DSRAT << ", RCU: " << DSRCU | ||
| << ", SCHED_QUEUE: " << DSSCHEDQ << ", LOAD_QUEUE: " << DSLQ | ||
| << ", STORE_QUEUE: " << DSSQ << '\n'; | ||
| } | ||
| #endif | ||
|
|
||
| } // namespace mca |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,319 @@ | ||
| //===----------------------- Dispatch.h -------------------------*- C++ -*-===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| /// \file | ||
| /// | ||
| /// This file implements classes that are used to model register files, | ||
| /// reorder buffers and the hardware dispatch logic. | ||
| /// | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_H | ||
| #define LLVM_TOOLS_LLVM_MCA_DISPATCH_H | ||
|
|
||
| #include "Instruction.h" | ||
| #include "llvm/MC/MCRegisterInfo.h" | ||
| #include <map> | ||
|
|
||
| namespace mca { | ||
|
|
||
| class WriteState; | ||
| class DispatchUnit; | ||
| class Scheduler; | ||
| class Backend; | ||
|
|
||
| /// \brief Keeps track of register definitions. | ||
| /// | ||
| /// This class tracks register definitions, and performs register renaming | ||
| /// to break anti dependencies. | ||
| /// By default, there is no limit in the number of register aliases which | ||
| /// can be created for the purpose of register renaming. However, users can | ||
| /// specify at object construction time a limit in the number of temporary | ||
| /// registers which can be used by the register renaming logic. | ||
| class RegisterFile { | ||
| const llvm::MCRegisterInfo &MRI; | ||
| // Currently used mappings and maximum used mappings. | ||
| // These are to generate statistics only. | ||
| unsigned NumUsedMappings; | ||
| unsigned MaxUsedMappings; | ||
| // Total number of mappings created over time. | ||
| unsigned TotalMappingsCreated; | ||
|
|
||
| // The maximum number of register aliases which can be used by the | ||
| // register renamer. Defaut value for this field is zero. | ||
| // A value of zero for this field means that there is no limit in the | ||
| // amount of register mappings which can be created. That is equivalent | ||
| // to having a theoretically infinite number of temporary registers. | ||
| unsigned TotalMappings; | ||
|
|
||
| // This map contains an entry for every physical register. | ||
| // A register index is used as a key value to access a WriteState. | ||
| // This is how we track RAW dependencies for dispatched | ||
| // instructions. For every register, we track the last seen write only. | ||
| // This assumes that all writes fully update both super and sub registers. | ||
| // We need a flag in MCInstrDesc to check if a write also updates super | ||
| // registers. We can then have a extra tablegen flag to set for instructions. | ||
| // This is a separate patch on its own. | ||
| std::vector<WriteState *> RegisterMappings; | ||
| // Assumptions are: | ||
| // a) a false dependencies is always removed by the register renamer. | ||
| // b) the register renamer can create an "infinite" number of mappings. | ||
| // Since we track the number of mappings created, in future we may | ||
| // introduce constraints on the number of mappings that can be created. | ||
| // For example, the maximum number of registers that are available for | ||
| // register renaming purposes may default to the size of the register file. | ||
|
|
||
| // In future, we can extend this design to allow multiple register files, and | ||
| // apply different restrictions on the register mappings and the number of | ||
| // temporary registers used by mappings. | ||
|
|
||
| public: | ||
| RegisterFile(const llvm::MCRegisterInfo &mri, unsigned Mappings = 0) | ||
| : MRI(mri), NumUsedMappings(0), MaxUsedMappings(0), | ||
| TotalMappingsCreated(0), TotalMappings(Mappings), | ||
| RegisterMappings(MRI.getNumRegs(), nullptr) {} | ||
|
|
||
| // Creates a new register mapping for RegID. | ||
| // This reserves a temporary register in the register file. | ||
| void addRegisterMapping(WriteState &WS); | ||
|
|
||
| // Invalidates register mappings associated to the input WriteState object. | ||
| // This releases temporary registers in the register file. | ||
| void invalidateRegisterMapping(const WriteState &WS); | ||
|
|
||
| bool isAvailable(unsigned NumRegWrites); | ||
| void collectWrites(llvm::SmallVectorImpl<WriteState *> &Writes, | ||
| unsigned RegID) const; | ||
| void updateOnRead(ReadState &RS, unsigned RegID); | ||
| unsigned getMaxUsedRegisterMappings() const { return MaxUsedMappings; } | ||
| unsigned getTotalRegisterMappingsCreated() const { | ||
| return TotalMappingsCreated; | ||
| } | ||
|
|
||
| #ifndef NDEBUG | ||
| void dump() const; | ||
| #endif | ||
| }; | ||
|
|
||
| /// \brief tracks which instructions are in-flight (i.e. dispatched but not | ||
| /// retired) in the OoO backend. | ||
| /// | ||
| /// This class checks on every cycle if/which instructions can be retired. | ||
| /// Instructions are retired in program order. | ||
| /// In the event of instruction retired, the DispatchUnit object that owns | ||
| /// this RetireControlUnit gets notified. | ||
| /// On instruction retired, register updates are all architecturally | ||
| /// committed, and any temporary registers originally allocated for the | ||
| /// retired instruction are freed. | ||
| struct RetireControlUnit { | ||
| // A "token" (object of class RUToken) is created by the retire unit for every | ||
| // instruction dispatched to the schedulers. Flag 'Executed' is used to | ||
| // quickly check if an instruction has reached the write-back stage. A token | ||
| // also carries information related to the number of entries consumed by the | ||
| // instruction in the reorder buffer. The idea is that those entries will | ||
| // become available again once the instruction is retired. On every cycle, | ||
| // the RCU (Retire Control Unit) scans every token starting to search for | ||
| // instructions that are ready to retire. retired. Instructions are retired | ||
| // in program order. Only 'Executed' instructions are eligible for retire. | ||
| // Note that the size of the reorder buffer is defined by the scheduling model | ||
| // via field 'NumMicroOpBufferSize'. | ||
| struct RUToken { | ||
| unsigned Index; // Instruction index. | ||
| unsigned NumSlots; // Slots reserved to this instruction. | ||
| bool Executed; // True if the instruction is past the WB stage. | ||
| }; | ||
|
|
||
| private: | ||
| unsigned NextAvailableSlotIdx; | ||
| unsigned CurrentInstructionSlotIdx; | ||
| unsigned AvailableSlots; | ||
| unsigned MaxRetirePerCycle; // 0 means no limit. | ||
| std::vector<RUToken> Queue; | ||
| DispatchUnit *Owner; | ||
|
|
||
| public: | ||
| RetireControlUnit(unsigned NumSlots, unsigned RPC, DispatchUnit *DU) | ||
| : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), | ||
| AvailableSlots(NumSlots), MaxRetirePerCycle(RPC), Owner(DU) { | ||
| assert(NumSlots && "Expected at least one slot!"); | ||
| Queue.resize(NumSlots); | ||
| } | ||
|
|
||
| bool isFull() const { return !AvailableSlots; } | ||
| bool isEmpty() const { return AvailableSlots == Queue.size(); } | ||
| bool isAvailable(unsigned Quantity = 1) const { | ||
| // Some instructions may declare a number of uOps which exceedes the size | ||
| // of the reorder buffer. To avoid problems, cap the amount of slots to | ||
| // the size of the reorder buffer. | ||
| Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size())); | ||
| return AvailableSlots >= Quantity; | ||
| } | ||
|
|
||
| // Reserves a number of slots, and returns a new token. | ||
| unsigned reserveSlot(unsigned Index, unsigned NumMicroOps); | ||
|
|
||
| /// Retires instructions in program order. | ||
| void cycleEvent(); | ||
|
|
||
| void onInstructionExecuted(unsigned TokenID); | ||
|
|
||
| #ifndef NDEBUG | ||
| void dump() const; | ||
| #endif | ||
| }; | ||
|
|
||
| // \brief Implements the hardware dispatch logic. | ||
| // | ||
| // This class is responsible for the dispatch stage, in which instructions are | ||
| // dispatched in groups to the Scheduler. An instruction can be dispatched if | ||
| // functional units are available. | ||
| // To be more specific, an instruction can be dispatched to the Scheduler if: | ||
| // 1) There are enough entries in the reorder buffer (implemented by class | ||
| // RetireControlUnit) to accomodate all opcodes. | ||
| // 2) There are enough temporaries to rename output register operands. | ||
| // 3) There are enough entries available in the used buffered resource(s). | ||
| // | ||
| // The number of micro opcodes that can be dispatched in one cycle is limited by | ||
| // the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when | ||
| // processor resources are not available (i.e. at least one of the | ||
| // abovementioned checks fails). Dispatch stall events are counted during the | ||
| // entire execution of the code, and displayed by the performance report when | ||
| // flag '-verbose' is specified. | ||
| // | ||
| // If the number of micro opcodes of an instruction is bigger than | ||
| // DispatchWidth, then it can only be dispatched at the beginning of one cycle. | ||
| // The DispatchUnit will still have to wait for a number of cycles (depending on | ||
| // the DispatchWidth and the number of micro opcodes) before it can serve other | ||
| // instructions. | ||
| class DispatchUnit { | ||
| unsigned DispatchWidth; | ||
| unsigned AvailableEntries; | ||
| unsigned CarryOver; | ||
| Scheduler *SC; | ||
|
|
||
| std::unique_ptr<RegisterFile> RAT; | ||
| std::unique_ptr<RetireControlUnit> RCU; | ||
| Backend *Owner; | ||
|
|
||
| /// Dispatch stall event identifiers. | ||
| /// | ||
| /// The naming convention is: | ||
| /// * Event names starts with the "DS_" prefix | ||
| /// * For dynamic dispatch stalls, the "DS_" prefix is followed by the | ||
| /// the unavailable resource/functional unit acronym (example: RAT) | ||
| /// * The last substring is the event reason (example: REG_UNAVAILABLE means | ||
| /// that register renaming couldn't find enough spare registers in the | ||
| /// register file). | ||
| /// | ||
| /// List of acronyms used for processor resoures: | ||
| /// RAT - Register Alias Table (used by the register renaming logic) | ||
| /// RCU - Retire Control Unit | ||
| /// SQ - Scheduler's Queue | ||
| /// LDQ - Load Queue | ||
| /// STQ - Store Queue | ||
| enum { | ||
| DS_RAT_REG_UNAVAILABLE, | ||
| DS_RCU_TOKEN_UNAVAILABLE, | ||
| DS_SQ_TOKEN_UNAVAILABLE, | ||
| DS_LDQ_TOKEN_UNAVAILABLE, | ||
| DS_STQ_TOKEN_UNAVAILABLE, | ||
| DS_DISPATCH_GROUP_RESTRICTION, | ||
| DS_LAST | ||
| }; | ||
|
|
||
| // The DispatchUnit track dispatch stall events caused by unavailable | ||
| // of hardware resources. Events are classified based on the stall kind; | ||
| // so we have a counter for every source of dispatch stall. Counters are | ||
| // stored into a vector `DispatchStall` which is always of size DS_LAST. | ||
| std::vector<unsigned> DispatchStalls; | ||
|
|
||
| bool checkRAT(const InstrDesc &Desc); | ||
| bool checkRCU(const InstrDesc &Desc); | ||
| bool checkScheduler(const InstrDesc &Desc); | ||
|
|
||
| void notifyInstructionDispatched(unsigned IID); | ||
|
|
||
| public: | ||
| DispatchUnit(Backend *B, const llvm::MCRegisterInfo &MRI, | ||
| unsigned MicroOpBufferSize, unsigned RegisterFileSize, | ||
| unsigned MaxRetirePerCycle, unsigned MaxDispatchWidth, | ||
| Scheduler *Sched) | ||
| : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth), | ||
| CarryOver(0U), SC(Sched), | ||
| RAT(llvm::make_unique<RegisterFile>(MRI, RegisterFileSize)), | ||
| RCU(llvm::make_unique<RetireControlUnit>(MicroOpBufferSize, | ||
| MaxRetirePerCycle, this)), | ||
| Owner(B), DispatchStalls(DS_LAST, 0) {} | ||
|
|
||
| unsigned getDispatchWidth() const { return DispatchWidth; } | ||
|
|
||
| bool isAvailable(unsigned NumEntries) const { | ||
| return NumEntries <= AvailableEntries || AvailableEntries == DispatchWidth; | ||
| } | ||
|
|
||
| bool isRCUEmpty() const { return RCU->isEmpty(); } | ||
|
|
||
| bool canDispatch(const InstrDesc &Desc) { | ||
| assert(isAvailable(Desc.NumMicroOps)); | ||
| return checkRCU(Desc) && checkRAT(Desc) && checkScheduler(Desc); | ||
| } | ||
|
|
||
| unsigned dispatch(unsigned IID, Instruction *NewInst); | ||
|
|
||
| void collectWrites(llvm::SmallVectorImpl<WriteState *> &Vec, | ||
| unsigned RegID) const { | ||
| return RAT->collectWrites(Vec, RegID); | ||
| } | ||
| unsigned getNumRATStalls() const { | ||
| return DispatchStalls[DS_RAT_REG_UNAVAILABLE]; | ||
| } | ||
| unsigned getNumRCUStalls() const { | ||
| return DispatchStalls[DS_RCU_TOKEN_UNAVAILABLE]; | ||
| } | ||
| unsigned getNumSQStalls() const { | ||
| return DispatchStalls[DS_SQ_TOKEN_UNAVAILABLE]; | ||
| } | ||
| unsigned getNumLDQStalls() const { | ||
| return DispatchStalls[DS_LDQ_TOKEN_UNAVAILABLE]; | ||
| } | ||
| unsigned getNumSTQStalls() const { | ||
| return DispatchStalls[DS_STQ_TOKEN_UNAVAILABLE]; | ||
| } | ||
| unsigned getNumDispatchGroupStalls() const { | ||
| return DispatchStalls[DS_DISPATCH_GROUP_RESTRICTION]; | ||
| } | ||
| unsigned getMaxUsedRegisterMappings() const { | ||
| return RAT->getMaxUsedRegisterMappings(); | ||
| } | ||
| unsigned getTotalRegisterMappingsCreated() const { | ||
| return RAT->getTotalRegisterMappingsCreated(); | ||
| } | ||
| void addNewRegisterMapping(WriteState &WS) { RAT->addRegisterMapping(WS); } | ||
|
|
||
| void cycleEvent(unsigned Cycle) { | ||
| RCU->cycleEvent(); | ||
| AvailableEntries = | ||
| CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver; | ||
| CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U; | ||
| } | ||
|
|
||
| void notifyInstructionRetired(unsigned Index); | ||
|
|
||
| void onInstructionExecuted(unsigned TokenID) { | ||
| RCU->onInstructionExecuted(TokenID); | ||
| } | ||
|
|
||
| void invalidateRegisterMappings(const Instruction &Inst); | ||
| #ifndef NDEBUG | ||
| void dump() const; | ||
| #endif | ||
| }; | ||
|
|
||
| } // namespace mca | ||
|
|
||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| //===----------------------- HWEventListener.cpp ----------------*- C++ -*-===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| /// \file | ||
| /// | ||
| /// This file defines a vtable anchor for struct HWEventListener. | ||
| /// | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "HWEventListener.h" | ||
|
|
||
| namespace mca { | ||
|
|
||
| // Anchor the vtable here. | ||
| void HWEventListener::anchor() {} | ||
|
|
||
| } // namespace mca |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
|
|
||
| //===----------------------- HWEventListener.h ------------------*- C++ -*-===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| /// \file | ||
| /// | ||
| /// This file defines the main interface for hardware event listeners. | ||
| /// | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H | ||
| #define LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H | ||
|
|
||
| #include "llvm/ADT/ArrayRef.h" | ||
| #include <utility> | ||
|
|
||
| namespace mca { | ||
|
|
||
| struct HWEventListener { | ||
| // Events generated by the Retire Control Unit. | ||
| virtual void onInstructionRetired(unsigned Index) {}; | ||
|
|
||
| // Events generated by the Scheduler. | ||
| using ResourceRef = std::pair<uint64_t, uint64_t>; | ||
| virtual void | ||
| onInstructionIssued(unsigned Index, | ||
| const llvm::ArrayRef<std::pair<ResourceRef, unsigned>> &Used) {} | ||
| virtual void onInstructionExecuted(unsigned Index) {} | ||
| virtual void onInstructionReady(unsigned Index) {} | ||
| virtual void onResourceAvailable(const ResourceRef &RRef) {}; | ||
|
|
||
| // Events generated by the Dispatch logic. | ||
| virtual void onInstructionDispatched(unsigned Index) {} | ||
|
|
||
| // Generic events generated by the Backend. | ||
| virtual void onCycleBegin(unsigned Cycle) {} | ||
| virtual void onCycleEnd(unsigned Cycle) {} | ||
|
|
||
| virtual ~HWEventListener() = default; | ||
| virtual void anchor(); | ||
| }; | ||
|
|
||
| } // namespace mca | ||
|
|
||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| //===--------------------- InstrBuilder.h -----------------------*- C++ -*-===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| /// \file | ||
| /// | ||
| /// A builder class for instructions that are statically analyzed by llvm-mca. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_TOOLS_LLVM_MCA_INSTRBUILDER_H | ||
| #define LLVM_TOOLS_LLVM_MCA_INSTRBUILDER_H | ||
|
|
||
| #include "Dispatch.h" | ||
| #include "Instruction.h" | ||
| #include "llvm/MC/MCInstrInfo.h" | ||
| #include "llvm/MC/MCSubtargetInfo.h" | ||
|
|
||
| namespace mca { | ||
|
|
||
| class DispatchUnit; | ||
|
|
||
| /// \brief A builder class that knows how to construct Instruction objects. | ||
| /// | ||
| /// Every llvm-mca Instruction is described by an object of class InstrDesc. | ||
| /// An InstrDesc describes which registers are read/written by the instruction, | ||
| /// as well as the instruction latency and hardware resources consumed. | ||
| /// | ||
| /// This class is used by the tool to construct Instructions and instruction | ||
| /// descriptors (i.e. InstrDesc objects). | ||
| /// Information from the machine scheduling model is used to identify processor | ||
| /// resources that are consumed by an instruction. | ||
| class InstrBuilder { | ||
| const llvm::MCInstrInfo &MCII; | ||
| const llvm::ArrayRef<uint64_t> ProcResourceMasks; | ||
|
|
||
| llvm::DenseMap<unsigned short, std::unique_ptr<const InstrDesc>> Descriptors; | ||
| llvm::DenseMap<unsigned, std::unique_ptr<Instruction>> Instructions; | ||
|
|
||
| void createInstrDescImpl(const llvm::MCSubtargetInfo &STI, | ||
| const llvm::MCInst &MCI); | ||
|
|
||
| public: | ||
| InstrBuilder(const llvm::MCInstrInfo &mcii, | ||
| const llvm::ArrayRef<uint64_t> Masks) | ||
| : MCII(mcii), ProcResourceMasks(Masks) {} | ||
|
|
||
| const InstrDesc &getOrCreateInstrDesc(const llvm::MCSubtargetInfo &STI, | ||
| const llvm::MCInst &MCI); | ||
|
|
||
| Instruction *createInstruction(const llvm::MCSubtargetInfo &STI, | ||
| DispatchUnit &DU, unsigned Idx, | ||
| const llvm::MCInst &MCI); | ||
| }; | ||
|
|
||
| } // namespace mca | ||
|
|
||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,134 @@ | ||
| //===--------------------- Instruction.cpp ----------------------*- C++ -*-===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // This file defines abstractions used by the Backend to model register reads, | ||
| // register writes and instructions. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "Instruction.h" | ||
| #include "llvm/Support/Debug.h" | ||
| #include "llvm/Support/raw_ostream.h" | ||
|
|
||
| namespace mca { | ||
|
|
||
| using namespace llvm; | ||
|
|
||
| void ReadState::writeStartEvent(unsigned Cycles) { | ||
| assert(DependentWrites); | ||
| assert(CyclesLeft == UNKNOWN_CYCLES); | ||
|
|
||
| // This read may be dependent on more than one write. This typically occurs | ||
| // when a definition is the result of multiple writes where at least one | ||
| // write does a partial register update. | ||
| // The HW is forced to do some extra bookkeeping to track of all the | ||
| // dependent writes, and implement a merging scheme for the partial writes. | ||
| --DependentWrites; | ||
| TotalCycles = std::max(TotalCycles, Cycles); | ||
|
|
||
| if (!DependentWrites) | ||
| CyclesLeft = TotalCycles; | ||
| } | ||
|
|
||
| void WriteState::onInstructionIssued() { | ||
| assert(CyclesLeft == UNKNOWN_CYCLES); | ||
| // Update the number of cycles left based on the WriteDescriptor info. | ||
| CyclesLeft = WD.Latency; | ||
|
|
||
| // Now that the time left before write-back is know, notify | ||
| // all the users. | ||
| for (const std::pair<ReadState *, int> &User : Users) { | ||
| ReadState *RS = User.first; | ||
| unsigned ReadCycles = std::max(0, CyclesLeft - User.second); | ||
| RS->writeStartEvent(ReadCycles); | ||
| } | ||
| } | ||
|
|
||
| void WriteState::addUser(ReadState *User, int ReadAdvance) { | ||
| // If CyclesLeft is different than -1, then we don't need to | ||
| // update the list of users. We can just notify the user with | ||
| // the actual number of cycles left (which may be zero). | ||
| if (CyclesLeft != UNKNOWN_CYCLES) { | ||
| unsigned ReadCycles = std::max(0, CyclesLeft - ReadAdvance); | ||
| User->writeStartEvent(ReadCycles); | ||
| return; | ||
| } | ||
|
|
||
| std::pair<ReadState *, int> NewPair(User, ReadAdvance); | ||
| Users.insert(NewPair); | ||
| } | ||
|
|
||
| void WriteState::cycleEvent() { | ||
| // Note: CyclesLeft can be a negative number. It is an error to | ||
| // make it an unsigned quantity because users of this write may | ||
| // specify a negative ReadAdvance. | ||
| if (CyclesLeft != UNKNOWN_CYCLES) | ||
| CyclesLeft--; | ||
| } | ||
|
|
||
| void ReadState::cycleEvent() { | ||
| // If CyclesLeft is unknown, then bail out immediately. | ||
| if (CyclesLeft == UNKNOWN_CYCLES) | ||
| return; | ||
|
|
||
| // If there are still dependent writes, or we reached cycle zero, | ||
| // then just exit. | ||
| if (DependentWrites || CyclesLeft == 0) | ||
| return; | ||
|
|
||
| CyclesLeft--; | ||
| } | ||
|
|
||
| #ifndef NDEBUG | ||
| void WriteState::dump() const { | ||
| dbgs() << "{ OpIdx=" << WD.OpIndex << ", Lat=" << WD.Latency << ", RegID " | ||
| << getRegisterID() << ", Cycles Left=" << getCyclesLeft() << " }\n"; | ||
| } | ||
| #endif | ||
|
|
||
| bool Instruction::isReady() { | ||
| if (Stage == IS_READY) | ||
| return true; | ||
|
|
||
| assert(Stage == IS_AVAILABLE); | ||
| for (const UniqueUse &Use : Uses) | ||
| if (!Use.get()->isReady()) | ||
| return false; | ||
|
|
||
| setReady(); | ||
| return true; | ||
| } | ||
|
|
||
| void Instruction::execute() { | ||
| assert(Stage == IS_READY); | ||
| Stage = IS_EXECUTING; | ||
| for (UniqueDef &Def : Defs) | ||
| Def->onInstructionIssued(); | ||
| } | ||
|
|
||
| bool Instruction::isZeroLatency() const { | ||
| return Desc.MaxLatency == 0 && Defs.size() == 0 && Uses.size() == 0; | ||
| } | ||
|
|
||
| void Instruction::cycleEvent() { | ||
| if (isDispatched()) { | ||
| for (UniqueUse &Use : Uses) | ||
| Use->cycleEvent(); | ||
| return; | ||
| } | ||
| if (isExecuting()) { | ||
| for (UniqueDef &Def : Defs) | ||
| Def->cycleEvent(); | ||
| CyclesLeft--; | ||
| } | ||
| if (!CyclesLeft) | ||
| Stage = IS_EXECUTED; | ||
| } | ||
|
|
||
| } // namespace mca |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,336 @@ | ||
| //===--------------------- Instruction.h ------------------------*- C++ -*-===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| /// \file | ||
| /// | ||
| /// This file defines abstractions used by the Backend to model register reads, | ||
| /// register writes and instructions. | ||
| /// | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H | ||
| #define LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H | ||
|
|
||
| #include "llvm/Support/MathExtras.h" | ||
| #include <memory> | ||
| #include <set> | ||
| #include <vector> | ||
|
|
||
| namespace mca { | ||
|
|
||
| struct WriteDescriptor; | ||
| struct ReadDescriptor; | ||
| class WriteState; | ||
| class ReadState; | ||
|
|
||
| constexpr int UNKNOWN_CYCLES = -512; | ||
|
|
||
| /// \brief A register write descriptor. | ||
| struct WriteDescriptor { | ||
| int OpIndex; // Operand index. -1 if this is an implicit write. | ||
| // Write latency. Number of cycles before write-back stage. | ||
| int Latency; | ||
| // This field is set to a value different than zero only if this | ||
| // is an implicit definition. | ||
| unsigned RegisterID; | ||
| // True if this write generates a partial update of a super-registers. | ||
| // On X86, this flag is set by byte/word writes on GPR registers. Also, | ||
| // a write of an XMM register only partially updates the corresponding | ||
| // YMM super-register if the write is associated to a legacy SSE instruction. | ||
| bool FullyUpdatesSuperRegs; | ||
| // Instruction itineraries would set this field to the SchedClass ID. | ||
| // Otherwise, it defaults to the WriteResourceID from teh MCWriteLatencyEntry | ||
| // element associated to this write. | ||
| // When computing read latencies, this value is matched against the | ||
| // "ReadAdvance" information. The hardware backend may implement | ||
| // dedicated forwarding paths to quickly propagate write results to dependent | ||
| // instructions waiting in the reservation station (effectively bypassing the | ||
| // write-back stage). | ||
| unsigned SClassOrWriteResourceID; | ||
| // True only if this is a write obtained from an optional definition. | ||
| // Optional definitions are allowed to reference regID zero (i.e. "no | ||
| // register"). | ||
| bool IsOptionalDef; | ||
| }; | ||
|
|
||
| /// \brief A register read descriptor. | ||
| struct ReadDescriptor { | ||
| // This field defaults to -1 if this is an implicit read. | ||
| int OpIndex; | ||
| // This field is only set if this is an implicit read. | ||
| unsigned RegisterID; | ||
| // Scheduling Class Index. It is used to query the scheduling model for the | ||
| // MCSchedClassDesc object. | ||
| unsigned SchedClassID; | ||
| // True if there may be a local forwarding logic in hardware to serve a | ||
| // write used by this read. This information, along with SchedClassID, is | ||
| // used to dynamically check at Instruction creation time, if the input | ||
| // operands can benefit from a ReadAdvance bonus. | ||
| bool HasReadAdvanceEntries; | ||
| }; | ||
|
|
||
| /// \brief Tracks uses of a register definition (e.g. register write). | ||
| /// | ||
| /// Each implicit/explicit register write is associated with an instance of | ||
| /// this class. A WriteState object tracks the dependent users of a | ||
| /// register write. It also tracks how many cycles are left before the write | ||
| /// back stage. | ||
| class WriteState { | ||
| const WriteDescriptor &WD; | ||
| // On instruction issue, this field is set equal to the write latency. | ||
| // Before instruction issue, this field defaults to -512, a special | ||
| // value that represents an "unknown" number of cycles. | ||
| int CyclesLeft; | ||
|
|
||
| // Actual register defined by this write. This field is only used | ||
| // to speedup queries on the register file. | ||
| // For implicit writes, this field always matches the value of | ||
| // field RegisterID from WD. | ||
| unsigned RegisterID; | ||
|
|
||
| // A list of dependent reads. Users is a set of dependent | ||
| // reads. A dependent read is added to the set only if CyclesLeft | ||
| // is "unknown". As soon as CyclesLeft is 'known', each user in the set | ||
| // gets notified with the actual CyclesLeft. | ||
|
|
||
| // The 'second' element of a pair is a "ReadAdvance" number of cycles. | ||
| std::set<std::pair<ReadState *, int>> Users; | ||
|
|
||
| public: | ||
| WriteState(const WriteDescriptor &Desc) | ||
| : WD(Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(Desc.RegisterID) {} | ||
| WriteState(const WriteState &Other) = delete; | ||
| WriteState &operator=(const WriteState &Other) = delete; | ||
|
|
||
| int getCyclesLeft() const { return CyclesLeft; } | ||
| unsigned getWriteResourceID() const { return WD.SClassOrWriteResourceID; } | ||
| unsigned getRegisterID() const { return RegisterID; } | ||
| void setRegisterID(unsigned ID) { RegisterID = ID; } | ||
|
|
||
| void addUser(ReadState *Use, int ReadAdvance); | ||
| bool fullyUpdatesSuperRegs() const { return WD.FullyUpdatesSuperRegs; } | ||
| bool isWrittenBack() const { return CyclesLeft == 0; } | ||
|
|
||
| // On every cycle, update CyclesLeft and notify dependent users. | ||
| void cycleEvent(); | ||
| void onInstructionIssued(); | ||
|
|
||
| #ifndef NDEBUG | ||
| void dump() const; | ||
| #endif | ||
| }; | ||
|
|
||
| /// \brief Tracks register operand latency in cycles. | ||
| /// | ||
| /// A read may be dependent on more than one write. This occurs when some | ||
| /// writes only partially update the register associated to this read. | ||
| class ReadState { | ||
| const ReadDescriptor &RD; | ||
| unsigned DependentWrites; | ||
| int CyclesLeft; | ||
| unsigned TotalCycles; | ||
|
|
||
| public: | ||
| bool isReady() const { | ||
| if (DependentWrites) | ||
| return false; | ||
| return (CyclesLeft == UNKNOWN_CYCLES || CyclesLeft == 0); | ||
| } | ||
|
|
||
| ReadState(const ReadDescriptor &Desc) | ||
| : RD(Desc), DependentWrites(0), CyclesLeft(UNKNOWN_CYCLES), | ||
| TotalCycles(0) {} | ||
| ReadState(const ReadState &Other) = delete; | ||
| ReadState &operator=(const ReadState &Other) = delete; | ||
|
|
||
| const ReadDescriptor &getDescriptor() const { return RD; } | ||
| unsigned getSchedClass() const { return RD.SchedClassID; } | ||
| void cycleEvent(); | ||
| void writeStartEvent(unsigned Cycles); | ||
| void setDependentWrites(unsigned Writes) { DependentWrites = Writes; } | ||
| }; | ||
|
|
||
| /// \brief A sequence of cycles. | ||
| /// | ||
| /// This class can be used as a building block to construct ranges of cycles. | ||
| class CycleSegment { | ||
| unsigned Begin; // Inclusive. | ||
| unsigned End; // Exclusive. | ||
| bool Reserved; // Resources associated to this segment must be reserved. | ||
|
|
||
| public: | ||
| CycleSegment(unsigned StartCycle, unsigned EndCycle, bool IsReserved = false) | ||
| : Begin(StartCycle), End(EndCycle), Reserved(IsReserved) {} | ||
|
|
||
| bool contains(unsigned Cycle) const { return Cycle >= Begin && Cycle < End; } | ||
| bool startsAfter(const CycleSegment &CS) const { return End <= CS.Begin; } | ||
| bool endsBefore(const CycleSegment &CS) const { return Begin >= CS.End; } | ||
| bool overlaps(const CycleSegment &CS) const { | ||
| return !startsAfter(CS) && !endsBefore(CS); | ||
| } | ||
| bool isExecuting() const { return Begin == 0 && End != 0; } | ||
| bool isExecuted() const { return End == 0; } | ||
| bool operator<(const CycleSegment &Other) const { | ||
| return Begin < Other.Begin; | ||
| } | ||
| CycleSegment &operator--(void) { | ||
| if (Begin) | ||
| Begin--; | ||
| if (End) | ||
| End--; | ||
| return *this; | ||
| } | ||
|
|
||
| bool isValid() const { return Begin <= End; } | ||
| unsigned size() const { return End - Begin; }; | ||
| void Subtract(unsigned Cycles) { | ||
| assert(End >= Cycles); | ||
| End -= Cycles; | ||
| } | ||
|
|
||
| unsigned begin() const { return Begin; } | ||
| unsigned end() const { return End; } | ||
| void setEnd(unsigned NewEnd) { End = NewEnd; } | ||
| bool isReserved() const { return Reserved; } | ||
| void setReserved() { Reserved = true; } | ||
| }; | ||
|
|
||
| /// \brief Helper used by class InstrDesc to describe how hardware resources | ||
| /// are used. | ||
| /// | ||
| /// This class describes how many resource units of a specific resource kind | ||
| /// (and how many cycles) are "used" by an instruction. | ||
| struct ResourceUsage { | ||
| CycleSegment CS; | ||
| unsigned NumUnits; | ||
| ResourceUsage(CycleSegment Cycles, unsigned Units = 1) | ||
| : CS(Cycles), NumUnits(Units) {} | ||
| unsigned size() const { return CS.size(); } | ||
| bool isReserved() const { return CS.isReserved(); } | ||
| void setReserved() { CS.setReserved(); } | ||
| }; | ||
|
|
||
| /// \brief An instruction descriptor | ||
| struct InstrDesc { | ||
| std::vector<WriteDescriptor> Writes; // Implicit writes are at the end. | ||
| std::vector<ReadDescriptor> Reads; // Implicit reads are at the end. | ||
|
|
||
| // For every resource used by an instruction of this kind, this vector | ||
| // reports the number of "consumed cycles". | ||
| std::vector<std::pair<uint64_t, ResourceUsage>> Resources; | ||
|
|
||
| // A list of buffered resources consumed by this instruction. | ||
| std::vector<uint64_t> Buffers; | ||
| unsigned MaxLatency; | ||
| // Number of MicroOps for this instruction. | ||
| unsigned NumMicroOps; | ||
|
|
||
| bool MayLoad; | ||
| bool MayStore; | ||
| bool HasSideEffects; | ||
| }; | ||
|
|
||
| /// An instruction dispatched to the out-of-order backend. | ||
| /// | ||
| /// This class is used to monitor changes in the internal state of instructions | ||
| /// that are dispatched by the DispatchUnit to the hardware schedulers. | ||
| class Instruction { | ||
| const InstrDesc &Desc; | ||
|
|
||
| enum InstrStage { | ||
| IS_INVALID, // Instruction in an invalid state. | ||
| IS_AVAILABLE, // Instruction dispatched but operands are not ready. | ||
| IS_READY, // Instruction dispatched and operands ready. | ||
| IS_EXECUTING, // Instruction issued. | ||
| IS_EXECUTED, // Instruction executed. Values are written back. | ||
| IS_RETIRED // Instruction retired. | ||
| }; | ||
|
|
||
| // The current instruction stage. | ||
| enum InstrStage Stage; | ||
|
|
||
| // This value defaults to the instruction latency. This instruction is | ||
| // considered executed when field CyclesLeft goes to zero. | ||
| int CyclesLeft; | ||
|
|
||
| // Retire Unit token ID for this instruction. | ||
| unsigned RCUTokenID; | ||
|
|
||
| using UniqueDef = std::unique_ptr<WriteState>; | ||
| using UniqueUse = std::unique_ptr<ReadState>; | ||
| using VecDefs = std::vector<UniqueDef>; | ||
| using VecUses = std::vector<UniqueUse>; | ||
|
|
||
| // Output dependencies. | ||
| // One entry per each implicit and explicit register definition. | ||
| VecDefs Defs; | ||
|
|
||
| // Input dependencies. | ||
| // One entry per each implicit and explicit register use. | ||
| VecUses Uses; | ||
|
|
||
| // This instruction has already been dispatched, and all operands are ready. | ||
| void setReady() { | ||
| assert(Stage == IS_AVAILABLE); | ||
| Stage = IS_READY; | ||
| } | ||
|
|
||
| public: | ||
| Instruction(const InstrDesc &D) | ||
| : Desc(D), Stage(IS_INVALID), CyclesLeft(-1) {} | ||
| Instruction(const Instruction &Other) = delete; | ||
| Instruction &operator=(const Instruction &Other) = delete; | ||
|
|
||
| VecDefs &getDefs() { return Defs; } | ||
| const VecDefs &getDefs() const { return Defs; } | ||
| VecUses &getUses() { return Uses; } | ||
| const VecUses &getUses() const { return Uses; } | ||
| const InstrDesc &getDesc() const { return Desc; } | ||
|
|
||
| unsigned getRCUTokenID() const { return RCUTokenID; } | ||
| int getCyclesLeft() const { return CyclesLeft; } | ||
| void setCyclesLeft(int Cycles) { CyclesLeft = Cycles; } | ||
| void setRCUTokenID(unsigned TokenID) { RCUTokenID = TokenID; } | ||
|
|
||
| // Transition to the dispatch stage. | ||
| // No definition is updated because the instruction is not "executing". | ||
| void dispatch() { | ||
| assert(Stage == IS_INVALID); | ||
| Stage = IS_AVAILABLE; | ||
| } | ||
|
|
||
| // Instruction issued. Transition to the IS_EXECUTING state, and update | ||
| // all the definitions. | ||
| void execute(); | ||
|
|
||
| void forceExecuted() { | ||
| assert((Stage == IS_INVALID && isZeroLatency()) || | ||
| (Stage == IS_READY && Desc.MaxLatency == 0)); | ||
| Stage = IS_EXECUTED; | ||
| } | ||
|
|
||
| // Checks if operands are available. If all operands area ready, | ||
| // then this forces a transition from IS_AVAILABLE to IS_READY. | ||
| bool isReady(); | ||
|
|
||
| bool isDispatched() const { return Stage == IS_AVAILABLE; } | ||
| bool isExecuting() const { return Stage == IS_EXECUTING; } | ||
| bool isExecuted() const { return Stage == IS_EXECUTED; } | ||
| bool isZeroLatency() const; | ||
|
|
||
| void retire() { | ||
| assert(Stage == IS_EXECUTED); | ||
| Stage = IS_RETIRED; | ||
| } | ||
|
|
||
| void cycleEvent(); | ||
| }; | ||
|
|
||
| } // namespace mca | ||
|
|
||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| ;===- ./tools/llvm-mc/LLVMBuild.txt ----------------------------*- Conf -*--===; | ||
| ; | ||
| ; The LLVM Compiler Infrastructure | ||
| ; | ||
| ; This file is distributed under the University of Illinois Open Source | ||
| ; License. See LICENSE.TXT for details. | ||
| ; | ||
| ;===------------------------------------------------------------------------===; | ||
| ; | ||
| ; This is an LLVMBuild description file for the components in this subdirectory. | ||
| ; | ||
| ; For more information on the LLVMBuild system, please see: | ||
| ; | ||
| ; http://llvm.org/docs/LLVMBuild.html | ||
| ; | ||
| ;===------------------------------------------------------------------------===; | ||
|
|
||
| [component_0] | ||
| type = Tool | ||
| name = llvm-mca | ||
| parent = Tools | ||
| required_libraries = MC MCParser Support all-targets |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,115 @@ | ||
| //===----------------------- LSUnit.cpp --------------------------*- C++-*-===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| /// \file | ||
| /// | ||
| /// A Load-Store Unit for the llvm-mca tool. | ||
| /// | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "LSUnit.h" | ||
| #include "llvm/Support/Debug.h" | ||
| #include "llvm/Support/raw_ostream.h" | ||
|
|
||
| using namespace llvm; | ||
|
|
||
| #define DEBUG_TYPE "llvm-mca" | ||
|
|
||
| namespace mca { | ||
|
|
||
| #ifndef NDEBUG | ||
| void LSUnit::dump() const { | ||
| dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n'; | ||
| dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n'; | ||
| dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n'; | ||
| dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n'; | ||
| } | ||
| #endif | ||
|
|
||
| void LSUnit::assignLQSlot(unsigned Index) { | ||
| assert(!isLQFull()); | ||
| assert(LoadQueue.count(Index) == 0); | ||
|
|
||
| DEBUG(dbgs() << "[LSUnit] - AssignLQSlot <Idx=" << Index | ||
| << ",slot=" << LoadQueue.size() << ">\n"); | ||
| LoadQueue.insert(Index); | ||
| } | ||
|
|
||
| void LSUnit::assignSQSlot(unsigned Index) { | ||
| assert(!isSQFull()); | ||
| assert(StoreQueue.count(Index) == 0); | ||
|
|
||
| DEBUG(dbgs() << "[LSUnit] - AssignSQSlot <Idx=" << Index | ||
| << ",slot=" << StoreQueue.size() << ">\n"); | ||
| StoreQueue.insert(Index); | ||
| } | ||
|
|
||
| bool LSUnit::isReady(unsigned Index) const { | ||
| bool IsALoad = LoadQueue.count(Index) != 0; | ||
| bool IsAStore = StoreQueue.count(Index) != 0; | ||
| unsigned LoadBarrierIndex = LoadBarriers.empty() ? 0 : *LoadBarriers.begin(); | ||
| unsigned StoreBarrierIndex = StoreBarriers.empty() ? 0 : *StoreBarriers.begin(); | ||
|
|
||
| if (IsALoad && LoadBarrierIndex) { | ||
| if (Index > LoadBarrierIndex) | ||
| return false; | ||
| if (Index == LoadBarrierIndex && Index != *LoadQueue.begin()) | ||
| return false; | ||
| } | ||
|
|
||
| if (IsAStore && StoreBarrierIndex) { | ||
| if (Index > StoreBarrierIndex) | ||
| return false; | ||
| if (Index == StoreBarrierIndex && Index != *StoreQueue.begin()) | ||
| return false; | ||
| } | ||
|
|
||
| if (NoAlias && IsALoad) | ||
| return true; | ||
|
|
||
| if (StoreQueue.size()) { | ||
| // Check if this memory operation is younger than the older store. | ||
| if (Index > *StoreQueue.begin()) | ||
| return false; | ||
| } | ||
|
|
||
| // Okay, we are older than the oldest store in the queue. | ||
| // If there are no pending loads, then we can say for sure that this | ||
| // instruction is ready. | ||
| if (isLQEmpty()) | ||
| return true; | ||
|
|
||
| // Check if there are no older loads. | ||
| if (Index <= *LoadQueue.begin()) | ||
| return true; | ||
|
|
||
| // There is at least one younger load. | ||
| return !IsAStore; | ||
| } | ||
|
|
||
| void LSUnit::onInstructionExecuted(unsigned Index) { | ||
| std::set<unsigned>::iterator it = LoadQueue.find(Index); | ||
| if (it != LoadQueue.end()) { | ||
| DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index | ||
| << " has been removed from the load queue.\n"); | ||
| LoadQueue.erase(it); | ||
| } | ||
|
|
||
| it = StoreQueue.find(Index); | ||
| if (it != StoreQueue.end()) { | ||
| DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index | ||
| << " has been removed from the store queue.\n"); | ||
| StoreQueue.erase(it); | ||
| } | ||
|
|
||
| if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) | ||
| StoreBarriers.erase(StoreBarriers.begin()); | ||
| if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) | ||
| LoadBarriers.erase(LoadBarriers.begin()); | ||
| } | ||
| } // namespace mca |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,160 @@ | ||
| //===------------------------- LSUnit.h --------------------------*- C++-*-===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| /// \file | ||
| /// | ||
| /// A Load/Store unit class that models load/store queues and that implements | ||
| /// a simple weak memory consistency model. | ||
| /// | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_TOOLS_LLVM_MCA_LSUNIT_H | ||
| #define LLVM_TOOLS_LLVM_MCA_LSUNIT_H | ||
|
|
||
| #include "llvm/Support/Debug.h" | ||
| #include "llvm/Support/raw_ostream.h" | ||
| #include <set> | ||
|
|
||
| #define DEBUG_TYPE "llvm-mca" | ||
|
|
||
| namespace mca { | ||
|
|
||
| /// \brief A Load/Store Unit implementing a load and store queues. | ||
| /// | ||
| /// This class implements a load queue and a store queue to emulate the | ||
| /// out-of-order execution of memory operations. | ||
| /// Each load (or store) consumes an entry in the load (or store) queue. | ||
| /// | ||
| /// Rules are: | ||
| /// 1) A younger load is allowed to pass an older load only if there are no | ||
| /// stores nor barriers in between the two loads. | ||
| /// 2) An younger store is not allowed to pass an older store. | ||
| /// 3) A younger store is not allowed to pass an older load. | ||
| /// 4) A younger load is allowed to pass an older store only if the load does | ||
| /// not alias with the store. | ||
| /// | ||
| /// This class optimistically assumes that loads don't alias store operations. | ||
| /// Under this assumption, younger loads are always allowed to pass older | ||
| /// stores (this would only affects rule 4). | ||
| /// Essentially, this LSUnit doesn't attempt to run any sort alias analysis to | ||
| /// predict when loads and stores don't alias with eachother. | ||
| /// | ||
| /// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be | ||
| /// set to `false` by the constructor of LSUnit. | ||
| /// | ||
| /// In the case of write-combining memory, rule 2. could be relaxed to allow | ||
| /// reordering of non-aliasing store operations. At the moment, this is not | ||
| /// allowed. | ||
| /// To put it in another way, there is no option to specify a different memory | ||
| /// type for memory operations (example: write-through, write-combining, etc.). | ||
| /// Also, there is no way to weaken the memory model, and this unit currently | ||
| /// doesn't support write-combining behavior. | ||
| /// | ||
| /// No assumptions are made on the size of the store buffer. | ||
| /// As mentioned before, this class doesn't perform alias analysis. | ||
| /// Consequently, LSUnit doesn't know how to identify cases where | ||
| /// store-to-load forwarding may occur. | ||
| /// | ||
| /// LSUnit doesn't attempt to predict whether a load or store hits or misses | ||
| /// the L1 cache. To be more specific, LSUnit doesn't know anything about | ||
| /// the cache hierarchy and memory types. | ||
| /// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the | ||
| /// scheduling model provides an "optimistic" load-to-use latency (which usually | ||
| /// matches the load-to-use latency for when there is a hit in the L1D). | ||
| /// | ||
| /// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor | ||
| /// memory-barrier like instructions. | ||
| /// LSUnit conservatively assumes that an instruction which `mayLoad` and has | ||
| /// `unmodeled side effects` behave like a "soft" load-barrier. That means, it | ||
| /// serializes loads without forcing a flush of the load queue. | ||
| /// Similarly, instructions that both `mayStore` and have `unmodeled side | ||
| /// effects` are treated like store barriers. A full memory | ||
| /// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side | ||
| /// effects. This is obviously inaccurate, but this is the best that we can do | ||
| /// at the moment. | ||
| /// | ||
| /// Each load/store barrier consumes one entry in the load/store queue. A | ||
| /// load/store barrier enforces ordering of loads/stores: | ||
| /// - A younger load cannot pass a load barrier. | ||
| /// - A younger store cannot pass a store barrier. | ||
| /// | ||
| /// A younger load has to wait for the memory load barrier to execute. | ||
| /// A load/store barrier is "executed" when it becomes the oldest entry in | ||
| /// the load/store queue(s). That also means, all the older loads/stores have | ||
| /// already been executed. | ||
| class LSUnit { | ||
| // Load queue size. | ||
| // LQ_Size == 0 means that there are infinite slots in the load queue. | ||
| unsigned LQ_Size; | ||
|
|
||
| // Store queue size. | ||
| // SQ_Size == 0 means that there are infinite slots in the store queue. | ||
| unsigned SQ_Size; | ||
|
|
||
| // If true, loads will never alias with stores. This is the default. | ||
| bool NoAlias; | ||
|
|
||
| std::set<unsigned> LoadQueue; | ||
| std::set<unsigned> StoreQueue; | ||
|
|
||
| void assignLQSlot(unsigned Index); | ||
| void assignSQSlot(unsigned Index); | ||
| bool isReadyNoAlias(unsigned Index) const; | ||
|
|
||
| // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is | ||
| // conservatively treated as a store barrier. It forces older store to be | ||
| // executed before newer stores are issued. | ||
| std::set<unsigned> StoreBarriers; | ||
|
|
||
| // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is | ||
| // conservatively treated as a load barrier. It forces older loads to execute | ||
| // before newer loads are issued. | ||
| std::set<unsigned> LoadBarriers; | ||
|
|
||
| public: | ||
| LSUnit(unsigned LQ = 0, unsigned SQ = 0, bool AssumeNoAlias = false) | ||
| : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) {} | ||
|
|
||
| #ifndef NDEBUG | ||
| void dump() const; | ||
| #endif | ||
|
|
||
| bool isSQEmpty() const { return StoreQueue.empty(); } | ||
| bool isLQEmpty() const { return LoadQueue.empty(); } | ||
| bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; } | ||
| bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; } | ||
|
|
||
| void reserve(unsigned Index, bool MayLoad, bool MayStore, bool IsMemBarrier) { | ||
| if (!MayLoad && !MayStore) | ||
| return; | ||
| if (MayLoad) { | ||
| if (IsMemBarrier) | ||
| LoadBarriers.insert(Index); | ||
| assignLQSlot(Index); | ||
| } | ||
| if (MayStore) { | ||
| if (IsMemBarrier) | ||
| StoreBarriers.insert(Index); | ||
| assignSQSlot(Index); | ||
| } | ||
| } | ||
|
|
||
| // The rules are: | ||
| // 1. A store may not pass a previous store. | ||
| // 2. A load may not pass a previous store unless flag 'NoAlias' is set. | ||
| // 3. A load may pass a previous load. | ||
| // 4. A store may not pass a previous load (regardless of flag 'NoAlias'). | ||
| // 5. A load has to wait until an older load barrier is fully executed. | ||
| // 6. A store has to wait until an older store barrier is fully executed. | ||
| bool isReady(unsigned Index) const; | ||
| void onInstructionExecuted(unsigned Index); | ||
| }; | ||
|
|
||
| } // namespace mca | ||
|
|
||
| #endif |