diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 27d52c16a4f39..575bd4c9d3561 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -44,6 +44,7 @@ include "RISCVSchedRocket.td" include "RISCVSchedSiFive7.td" include "RISCVSchedSiFiveP400.td" include "RISCVSchedSyntacoreSCR1.td" +include "RISCVSchedXiangShanNanHu.td" //===----------------------------------------------------------------------===// // RISC-V processors supported. diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 59bb811058d48..8c75df41f5e39 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -330,7 +330,7 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1", TuneLDADDFusion]>; def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu", - NoSchedModel, + XiangShanNanHuModel, [Feature64Bit, FeatureStdExtZicsr, FeatureStdExtZifencei, @@ -348,4 +348,8 @@ def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu", FeatureStdExtZksh, FeatureStdExtSvinval, FeatureStdExtZicbom, - FeatureStdExtZicboz]>; + FeatureStdExtZicboz], + [TuneNoDefaultUnroll, + TuneZExtHFusion, + TuneZExtWFusion, + TuneShiftedZExtWFusion]>; diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td new file mode 100644 index 0000000000000..667b5983cb401 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td @@ -0,0 +1,308 @@ +//==- RISCVSchedXiangShanNanHu.td - XS-NanHu Scheduling Defs -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// + +// XiangShan is a high-performance open-source RISC-V processor developed by +// the Institute of Computing Technology (ICT), Chinese Academy of Sciences. +// Source: https://github.com/OpenXiangShan/XiangShan +// Documentation: https://github.com/OpenXiangShan/XiangShan-doc + +// XiangShan-NanHu is the second generation of XiangShan processor series. +// Overview: https://xiangshan-doc.readthedocs.io/zh-cn/latest/integration/overview/ + +def XiangShanNanHuModel : SchedMachineModel { + let MicroOpBufferSize = 256; + let LoopMicroOpBufferSize = 48; // Instruction queue size + let IssueWidth = 6; // 6-way decode and dispatch + let LoadLatency = 4; + let MispredictPenalty = 11; // Based on estimate of pipeline depth. + let CompleteModel = 0; + let UnsupportedFeatures = [HasStdExtZcmt, HasStdExtZkr, HasVInstructions, + HasVInstructionsI64]; +} + +let SchedModel = XiangShanNanHuModel in { + +// The reservation stations are distributed and grouped as 32-entry or 16-entry smaller ones. +let BufferSize = 16 in { + def XS2ALU : ProcResource<4>; + def XS2MDU : ProcResource<2>; + def XS2MISC : ProcResource<1>; + + def XS2FMAC : ProcResource<4>; + def XS2FMISC : ProcResource<2>; + + // Load/Store queues are ignored. + def XS2LD : ProcResource<2>; + def XS2ST : ProcResource<2>; +} + +// Branching +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Integer arithmetic and logic +let Latency = 1 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// Integer multiplication +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +} + +// Integer division +// SRT16 algorithm +let Latency = 20, ReleaseAtCycles = [20] in { +def : WriteRes; +def : WriteRes; +} + +// Zb* +let Latency = 1 in { +// Zba +def : WriteRes; +def : WriteRes; + +// Zbb +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Zbkb +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Zbs +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 3 in { +// Zbb +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Zbkc +def : WriteRes; + +// Zbkx +def : WriteRes; +} + +// Memory +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +let Latency = 5 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +} + +// XiangShan-NanHu uses FuDian FPU instead of Berkeley HardFloat. +// Documentation: https://github.com/OpenXiangShan/fudian + +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// FP multiplication +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +} + +let Latency = 5 in { +def : WriteRes; +def : WriteRes; +} + +// FP division +def : WriteRes { + let Latency = 11; +} +def : WriteRes { + let Latency = 18; +} + +def : WriteRes { + let Latency = 17; +} +def : WriteRes { + let Latency = 31; +} + +// Others +def : WriteRes; +def : WriteRes; + +def : InstRW<[WriteIALU], (instrs COPY)>; + +// Bypass and advance + +class XS2LoadToALUBypass + : ReadAdvance; + +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : XS2LoadToALUBypass; +def : XS2LoadToALUBypass; +def : XS2LoadToALUBypass; +def : XS2LoadToALUBypass; +def : XS2LoadToALUBypass; +def : XS2LoadToALUBypass; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; // Cascade FMA +def : ReadAdvance; +def : ReadAdvance; // Cascade FMA +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Zb* +// Zba +def : XS2LoadToALUBypass; +def : XS2LoadToALUBypass; +// Zbb +def : XS2LoadToALUBypass; +def : XS2LoadToALUBypass; +def : XS2LoadToALUBypass; +def : XS2LoadToALUBypass; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : XS2LoadToALUBypass; +def : XS2LoadToALUBypass; +// Zbkc +def : ReadAdvance; +// Zbs +def : XS2LoadToALUBypass; +def : XS2LoadToALUBypass; +// Zbkb +def : XS2LoadToALUBypass; +def : XS2LoadToALUBypass; +def : XS2LoadToALUBypass; +def : XS2LoadToALUBypass; +// Zbkx +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Unsupported extensions +defm : UnsupportedSchedV; +defm : UnsupportedSchedZfa; +defm : UnsupportedSchedZfh; +defm : UnsupportedSchedSFB; +defm : UnsupportedSchedZabha; +} diff --git a/llvm/test/tools/llvm-mca/RISCV/XiangShan/cascade-fma.s b/llvm/test/tools/llvm-mca/RISCV/XiangShan/cascade-fma.s new file mode 100644 index 0000000000000..d44eb55ebf759 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/XiangShan/cascade-fma.s @@ -0,0 +1,53 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=xiangshan-nanhu < %s | FileCheck %s + +# Test XiangShan FuDian's cascade FMA, CPI = 3 +fmadd.s fa0, fa1, fa2, fa0 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 100 +# CHECK-NEXT: Total Cycles: 305 +# CHECK-NEXT: Total uOps: 100 + +# CHECK: Dispatch Width: 6 +# CHECK-NEXT: uOps Per Cycle: 0.33 +# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: Block RThroughput: 0.3 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 5 0.25 fmadd.s fa0, fa1, fa2, fa0 + +# CHECK: Resources: +# CHECK-NEXT: [0.0] - XS2ALU +# CHECK-NEXT: [0.1] - XS2ALU +# CHECK-NEXT: [0.2] - XS2ALU +# CHECK-NEXT: [0.3] - XS2ALU +# CHECK-NEXT: [1.0] - XS2FMAC +# CHECK-NEXT: [1.1] - XS2FMAC +# CHECK-NEXT: [1.2] - XS2FMAC +# CHECK-NEXT: [1.3] - XS2FMAC +# CHECK-NEXT: [2.0] - XS2FMISC +# CHECK-NEXT: [2.1] - XS2FMISC +# CHECK-NEXT: [3.0] - XS2LD +# CHECK-NEXT: [3.1] - XS2LD +# CHECK-NEXT: [4.0] - XS2MDU +# CHECK-NEXT: [4.1] - XS2MDU +# CHECK-NEXT: [5] - XS2MISC +# CHECK-NEXT: [6.0] - XS2ST +# CHECK-NEXT: [6.1] - XS2ST + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0.0] [0.1] [0.2] [0.3] [1.0] [1.1] [1.2] [1.3] [2.0] [2.1] [3.0] [3.1] [4.0] [4.1] [5] [6.0] [6.1] +# CHECK-NEXT: - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0.0] [0.1] [0.2] [0.3] [1.0] [1.1] [1.2] [1.3] [2.0] [2.1] [3.0] [3.1] [4.0] [4.1] [5] [6.0] [6.1] Instructions: +# CHECK-NEXT: - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - fmadd.s fa0, fa1, fa2, fa0 diff --git a/llvm/test/tools/llvm-mca/RISCV/XiangShan/gpr-bypass.s b/llvm/test/tools/llvm-mca/RISCV/XiangShan/gpr-bypass.s new file mode 100644 index 0000000000000..677fece1535a0 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/XiangShan/gpr-bypass.s @@ -0,0 +1,527 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=xiangshan-nanhu -timeline \ +# RUN: -timeline-max-cycles=1000 -iterations=1 < %s | FileCheck %s + +lui a0, 1 +auipc a1, 1 +add a0, a0, a1 +addi a0, a0, 1 +addw a0, a0, a0 +addiw a0, a0, 1 +sub a0, a0, a0 +subw a0, a0, a0 +and a0, a0, a0 +andi a0, a0, 1 +or a0, a0, a0 +ori a0, a0, 1 +xor a0, a0, a0 +xori a0, a0, 1 +sll a0, a0, a0 +slli a0, a0, 1 +sllw a0, a0, a0 +slliw a0, a0, 1 +srl a0, a0, a0 +srli a0, a0, 1 +srlw a0, a0, a0 +srliw a0, a0, 1 +sra a0, a0, a0 +srai a0, a0, 1 +sraw a0, a0, a0 +sraiw a0, a0, 1 +slt a0, a0, a0 +slti a0, a0, 1 +sltu a0, a0, a0 +sltiu a0, a0, 1 +mul a0, a0, a0 +add a0, a0, a0 +mulw a0, a0, a0 +add a0, a0, a0 +beq a0, a0, 1f +1: +add a0, a0, a0 +bne a0, a0, 1f +1: +add a0, a0, a0 +blt a0, a0, 1f +1: +add a0, a0, a0 +bltu a0, a0, 1f +1: +add a0, a0, a0 +bge a0, a0, 1f +1: +add a0, a0, a0 +bgeu a0, a0, 1f +1: +# zba +add.uw a0, a0, a0 +slli.uw a0, a0, 1 +sh1add.uw a0, a0, a0 +sh2add.uw a0, a0, a0 +sh3add.uw a0, a0, a0 +sh1add a0, a0, a0 +sh2add a0, a0, a0 +sh3add a0, a0, a0 +# zbb +andn a0, a0, a0 +orn a0, a0, a0 +xnor a0, a0, a0 +sext.b a0, a0 +sext.h a0, a0 +zext.h a0, a0 +min a0, a0, a0 +minu a0, a0, a0 +max a0, a0, a0 +maxu a0, a0, a0 +rol a0, a0, a0 +ror a0, a0, a0 +rori a0, a0, 1 +clz a0, a0 +clzw a0, a0 +ctz a0, a0 +ctzw a0, a0 +cpop a0, a0 +add a0, a0, a0 +cpopw a0, a0 +add a0, a0, a0 +rev8 a0, a0 +orc.b a0, a0 +lb a0, 0(a0) +add a0, a0, a0 +lh a0, 0(a0) +and a0, a0, a0 +lw a0, 0(a0) +or a0, a0, a0 +ld a0, 0(a0) +xor a0, a0, a0 +lbu a0, 0(a0) +addi a0, a0, 1 +lhu a0, 0(a0) +sub a0, a0, a0 +lwu a0, 0(a0) +addw a0, a0, a0 +jr a0 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 91 +# CHECK-NEXT: Total Cycles: 124 +# CHECK-NEXT: Total uOps: 91 + +# CHECK: Dispatch Width: 6 +# CHECK-NEXT: uOps Per Cycle: 0.73 +# CHECK-NEXT: IPC: 0.73 +# CHECK-NEXT: Block RThroughput: 17.3 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.25 lui a0, 1 +# CHECK-NEXT: 1 1 0.25 auipc a1, 1 +# CHECK-NEXT: 1 1 0.25 add a0, a0, a1 +# CHECK-NEXT: 1 1 0.25 addi a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 addw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 addiw a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 sub a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 subw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 and a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 andi a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 or a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 ori a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 xor a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 xori a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 sll a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 slli a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 sllw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 slliw a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 srl a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 srli a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 srlw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 srliw a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 sra a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 srai a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 sraw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sraiw a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 slt a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 slti a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 sltu a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 seqz a0, a0 +# CHECK-NEXT: 1 3 0.50 mul a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 add a0, a0, a0 +# CHECK-NEXT: 1 3 0.50 mulw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 add a0, a0, a0 +# CHECK-NEXT: 1 1 1.00 beq a0, a0, .Ltmp0 +# CHECK-NEXT: 1 1 0.25 add a0, a0, a0 +# CHECK-NEXT: 1 1 1.00 bne a0, a0, .Ltmp1 +# CHECK-NEXT: 1 1 0.25 add a0, a0, a0 +# CHECK-NEXT: 1 1 1.00 blt a0, a0, .Ltmp2 +# CHECK-NEXT: 1 1 0.25 add a0, a0, a0 +# CHECK-NEXT: 1 1 1.00 bltu a0, a0, .Ltmp3 +# CHECK-NEXT: 1 1 0.25 add a0, a0, a0 +# CHECK-NEXT: 1 1 1.00 bge a0, a0, .Ltmp4 +# CHECK-NEXT: 1 1 0.25 add a0, a0, a0 +# CHECK-NEXT: 1 1 1.00 bgeu a0, a0, .Ltmp5 +# CHECK-NEXT: 1 1 0.25 add.uw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 slli.uw a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 sh1add.uw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sh2add.uw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sh3add.uw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sh1add a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sh2add a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sh3add a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 andn a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 orn a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 xnor a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sext.b a0, a0 +# CHECK-NEXT: 1 1 0.25 sext.h a0, a0 +# CHECK-NEXT: 1 1 0.25 zext.h a0, a0 +# CHECK-NEXT: 1 1 0.25 min a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 minu a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 max a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 maxu a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 rol a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 ror a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 rori a0, a0, 1 +# CHECK-NEXT: 1 3 0.50 clz a0, a0 +# CHECK-NEXT: 1 3 0.50 clzw a0, a0 +# CHECK-NEXT: 1 3 0.50 ctz a0, a0 +# CHECK-NEXT: 1 3 0.50 ctzw a0, a0 +# CHECK-NEXT: 1 3 0.50 cpop a0, a0 +# CHECK-NEXT: 1 1 0.25 add a0, a0, a0 +# CHECK-NEXT: 1 3 0.50 cpopw a0, a0 +# CHECK-NEXT: 1 1 0.25 add a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 rev8 a0, a0 +# CHECK-NEXT: 1 1 0.25 orc.b a0, a0 +# CHECK-NEXT: 1 5 0.50 * lb a0, 0(a0) +# CHECK-NEXT: 1 1 0.25 add a0, a0, a0 +# CHECK-NEXT: 1 5 0.50 * lh a0, 0(a0) +# CHECK-NEXT: 1 1 0.25 and a0, a0, a0 +# CHECK-NEXT: 1 5 0.50 * lw a0, 0(a0) +# CHECK-NEXT: 1 1 0.25 or a0, a0, a0 +# CHECK-NEXT: 1 5 0.50 * ld a0, 0(a0) +# CHECK-NEXT: 1 1 0.25 xor a0, a0, a0 +# CHECK-NEXT: 1 5 0.50 * lbu a0, 0(a0) +# CHECK-NEXT: 1 1 0.25 addi a0, a0, 1 +# CHECK-NEXT: 1 5 0.50 * lhu a0, 0(a0) +# CHECK-NEXT: 1 1 0.25 sub a0, a0, a0 +# CHECK-NEXT: 1 5 0.50 * lwu a0, 0(a0) +# CHECK-NEXT: 1 1 0.25 addw a0, a0, a0 +# CHECK-NEXT: 1 1 1.00 jr a0 + +# CHECK: Resources: +# CHECK-NEXT: [0.0] - XS2ALU +# CHECK-NEXT: [0.1] - XS2ALU +# CHECK-NEXT: [0.2] - XS2ALU +# CHECK-NEXT: [0.3] - XS2ALU +# CHECK-NEXT: [1.0] - XS2FMAC +# CHECK-NEXT: [1.1] - XS2FMAC +# CHECK-NEXT: [1.2] - XS2FMAC +# CHECK-NEXT: [1.3] - XS2FMAC +# CHECK-NEXT: [2.0] - XS2FMISC +# CHECK-NEXT: [2.1] - XS2FMISC +# CHECK-NEXT: [3.0] - XS2LD +# CHECK-NEXT: [3.1] - XS2LD +# CHECK-NEXT: [4.0] - XS2MDU +# CHECK-NEXT: [4.1] - XS2MDU +# CHECK-NEXT: [5] - XS2MISC +# CHECK-NEXT: [6.0] - XS2ST +# CHECK-NEXT: [6.1] - XS2ST + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0.0] [0.1] [0.2] [0.3] [1.0] [1.1] [1.2] [1.3] [2.0] [2.1] [3.0] [3.1] [4.0] [4.1] [5] [6.0] [6.1] +# CHECK-NEXT: 17.00 17.00 17.00 18.00 - - - - - - 3.00 4.00 4.00 4.00 7.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0.0] [0.1] [0.2] [0.3] [1.0] [1.1] [1.2] [1.3] [2.0] [2.1] [3.0] [3.1] [4.0] [4.1] [5] [6.0] [6.1] Instructions: +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - lui a0, 1 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - auipc a1, 1 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - add a0, a0, a1 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - addi a0, a0, 1 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - addw a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - addiw a0, a0, 1 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - sub a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - subw a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - and a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - andi a0, a0, 1 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - or a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - ori a0, a0, 1 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - xor a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - xori a0, a0, 1 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - sll a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - slli a0, a0, 1 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - sllw a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - slliw a0, a0, 1 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - srl a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - srli a0, a0, 1 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - srlw a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - srliw a0, a0, 1 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - sra a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - srai a0, a0, 1 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - sraw a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - sraiw a0, a0, 1 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - slt a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - slti a0, a0, 1 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - sltu a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - seqz a0, a0 +# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - - - mul a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - add a0, a0, a0 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - mulw a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - add a0, a0, a0 +# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - beq a0, a0, .Ltmp0 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - add a0, a0, a0 +# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - bne a0, a0, .Ltmp1 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - add a0, a0, a0 +# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - blt a0, a0, .Ltmp2 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - add a0, a0, a0 +# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - bltu a0, a0, .Ltmp3 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - add a0, a0, a0 +# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - bge a0, a0, .Ltmp4 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - add a0, a0, a0 +# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - bgeu a0, a0, .Ltmp5 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - add.uw a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - slli.uw a0, a0, 1 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - sh1add.uw a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - sh2add.uw a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - sh3add.uw a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - sh1add a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - sh2add a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - sh3add a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - andn a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - orn a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - xnor a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - sext.b a0, a0 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - sext.h a0, a0 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - zext.h a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - min a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - minu a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - max a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - maxu a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - rol a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - ror a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - rori a0, a0, 1 +# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - - - clz a0, a0 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - clzw a0, a0 +# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - - - ctz a0, a0 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - ctzw a0, a0 +# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - - - cpop a0, a0 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - add a0, a0, a0 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - cpopw a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - add a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - rev8 a0, a0 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - orc.b a0, a0 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - lb a0, 0(a0) +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - add a0, a0, a0 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - lh a0, 0(a0) +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - and a0, a0, a0 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - lw a0, 0(a0) +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - or a0, a0, a0 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - ld a0, 0(a0) +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - xor a0, a0, a0 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - lbu a0, 0(a0) +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - addi a0, a0, 1 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - lhu a0, 0(a0) +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - sub a0, a0, a0 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - lwu a0, 0(a0) +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - addw a0, a0, a0 +# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - jr a0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123 + +# CHECK: [0,0] DeER . . . . . . . . . . . . . . . . . . . . . . . . . lui a0, 1 +# CHECK-NEXT: [0,1] DeER . . . . . . . . . . . . . . . . . . . . . . . . . auipc a1, 1 +# CHECK-NEXT: [0,2] D=eER. . . . . . . . . . . . . . . . . . . . . . . . . add a0, a0, a1 +# CHECK-NEXT: [0,3] D==eER . . . . . . . . . . . . . . . . . . . . . . . . addi a0, a0, 1 +# CHECK-NEXT: [0,4] D===eER . . . . . . . . . . . . . . . . . . . . . . . . addw a0, a0, a0 +# CHECK-NEXT: [0,5] D====eER . . . . . . . . . . . . . . . . . . . . . . . . addiw a0, a0, 1 +# CHECK-NEXT: [0,6] .D====eER . . . . . . . . . . . . . . . . . . . . . . . . sub a0, a0, a0 +# CHECK-NEXT: [0,7] .D=====eER. . . . . . . . . . . . . . . . . . . . . . . . subw a0, a0, a0 +# CHECK-NEXT: [0,8] .D======eER . . . . . . . . . . . . . . . . . . . . . . . and a0, a0, a0 +# CHECK-NEXT: [0,9] .D=======eER . . . . . . . . . . . . . . . . . . . . . . . andi a0, a0, 1 +# CHECK-NEXT: [0,10] .D========eER . . . . . . . . . . . . . . . . . . . . . . . or a0, a0, a0 +# CHECK-NEXT: [0,11] .D=========eER . . . . . . . . . . . . . . . . . . . . . . . ori a0, a0, 1 +# CHECK-NEXT: [0,12] . D=========eER. . . . . . . . . . . . . . . . . . . . . . . xor a0, a0, a0 +# CHECK-NEXT: [0,13] . D==========eER . . . . . . . . . . . . . . . . . . . . . . xori a0, a0, 1 +# CHECK-NEXT: [0,14] . D===========eER . . . . . . . . . . . . . . . . . . . . . . sll a0, a0, a0 +# CHECK-NEXT: [0,15] . D============eER . . . . . . . . . . . . . . . . . . . . . . slli a0, a0, 1 +# CHECK-NEXT: [0,16] . D=============eER . . . . . . . . . . . . . . . . . . . . . . sllw a0, a0, a0 +# CHECK-NEXT: [0,17] . D==============eER. . . . . . . . . . . . . . . . . . . . . . slliw a0, a0, 1 +# CHECK-NEXT: [0,18] . D==============eER . . . . . . . . . . . . . . . . . . . . . srl a0, a0, a0 +# CHECK-NEXT: [0,19] . D===============eER . . . . . . . . . . . . . . . . . . . . . srli a0, a0, 1 +# CHECK-NEXT: [0,20] . D===============eER . . . . . . . . . . . . . . . . . . . . . srlw a0, a0, a0 +# CHECK-NEXT: [0,21] . D===============eER . . . . . . . . . . . . . . . . . . . . . srliw a0, a0, 1 +# CHECK-NEXT: [0,22] . .D===============eER. . . . . . . . . . . . . . . . . . . . . sra a0, a0, a0 +# CHECK-NEXT: [0,23] . . D===============eER . . . . . . . . . . . . . . . . . . . . srai a0, a0, 1 +# CHECK-NEXT: [0,24] . . D===============eER . . . . . . . . . . . . . . . . . . . . sraw a0, a0, a0 +# CHECK-NEXT: [0,25] . . D===============eER . . . . . . . . . . . . . . . . . . . . sraiw a0, a0, 1 +# CHECK-NEXT: [0,26] . . D===============eER . . . . . . . . . . . . . . . . . . . . slt a0, a0, a0 +# CHECK-NEXT: [0,27] . . .D===============eER. . . . . . . . . . . . . . . . . . . . slti a0, a0, 1 +# CHECK-NEXT: [0,28] . . . D===============eER . . . . . . . . . . . . . . . . . . . sltu a0, a0, a0 +# CHECK-NEXT: [0,29] . . . D===============eER . . . . . . . . . . . . . . . . . . . seqz a0, a0 +# CHECK-NEXT: [0,30] . . . D================eeeER. . . . . . . . . . . . . . . . . . . mul a0, a0, a0 +# CHECK-NEXT: [0,31] . . . D==================eER . . . . . . . . . . . . . . . . . . add a0, a0, a0 +# CHECK-NEXT: [0,32] . . . D===================eeeER . . . . . . . . . . . . . . . . . . mulw a0, a0, a0 +# CHECK-NEXT: [0,33] . . . D=====================eER. . . . . . . . . . . . . . . . . . add a0, a0, a0 +# CHECK-NEXT: [0,34] . . . D======================eER . . . . . . . . . . . . . . . . . beq a0, a0, .Ltmp0 +# CHECK-NEXT: [0,35] . . . .D=====================eER . . . . . . . . . . . . . . . . . add a0, a0, a0 +# CHECK-NEXT: [0,36] . . . .D======================eER . . . . . . . . . . . . . . . . . bne a0, a0, .Ltmp1 +# CHECK-NEXT: [0,37] . . . . D=====================eER . . . . . . . . . . . . . . . . . add a0, a0, a0 +# CHECK-NEXT: [0,38] . . . . D======================eER . . . . . . . . . . . . . . . . . blt a0, a0, .Ltmp2 +# CHECK-NEXT: [0,39] . . . . D=====================eER . . . . . . . . . . . . . . . . . add a0, a0, a0 +# CHECK-NEXT: [0,40] . . . . D======================eER . . . . . . . . . . . . . . . . . bltu a0, a0, .Ltmp3 +# CHECK-NEXT: [0,41] . . . . D=====================eER . . . . . . . . . . . . . . . . . add a0, a0, a0 +# CHECK-NEXT: [0,42] . . . . D======================eER. . . . . . . . . . . . . . . . . bge a0, a0, .Ltmp4 +# CHECK-NEXT: [0,43] . . . . D=====================eER. . . . . . . . . . . . . . . . . add a0, a0, a0 +# CHECK-NEXT: [0,44] . . . . D======================eER . . . . . . . . . . . . . . . . bgeu a0, a0, .Ltmp5 +# CHECK-NEXT: [0,45] . . . . .D=====================eER . . . . . . . . . . . . . . . . add.uw a0, a0, a0 +# CHECK-NEXT: [0,46] . . . . . D=====================eER . . . . . . . . . . . . . . . . slli.uw a0, a0, 1 +# CHECK-NEXT: [0,47] . . . . . D=====================eER . . . . . . . . . . . . . . . . sh1add.uw a0, a0, a0 +# CHECK-NEXT: [0,48] . . . . . D=====================eER . . . . . . . . . . . . . . . . sh2add.uw a0, a0, a0 +# CHECK-NEXT: [0,49] . . . . . D=====================eER. . . . . . . . . . . . . . . . sh3add.uw a0, a0, a0 +# CHECK-NEXT: [0,50] . . . . . .D=====================eER . . . . . . . . . . . . . . . sh1add a0, a0, a0 +# CHECK-NEXT: [0,51] . . . . . . D=====================eER . . . . . . . . . . . . . . . sh2add a0, a0, a0 +# CHECK-NEXT: [0,52] . . . . . . D=====================eER . . . . . . . . . . . . . . . sh3add a0, a0, a0 +# CHECK-NEXT: [0,53] . . . . . . D=====================eER . . . . . . . . . . . . . . . andn a0, a0, a0 +# CHECK-NEXT: [0,54] . . . . . . . D==================eER. . . . . . . . . . . . . . . orn a0, a0, a0 +# CHECK-NEXT: [0,55] . . . . . . . . D===============eER . . . . . . . . . . . . . . xnor a0, a0, a0 +# CHECK-NEXT: [0,56] . . . . . . . . D===============eER . . . . . . . . . . . . . . sext.b a0, a0 +# CHECK-NEXT: [0,57] . . . . . . . . D===============eER . . . . . . . . . . . . . . sext.h a0, a0 +# CHECK-NEXT: [0,58] . . . . . . . . D===============eER . . . . . . . . . . . . . . zext.h a0, a0 +# CHECK-NEXT: [0,59] . . . . . . . . .D===============eER. . . . . . . . . . . . . . min a0, a0, a0 +# CHECK-NEXT: [0,60] . . . . . . . . . D===============eER . . . . . . . . . . . . . minu a0, a0, a0 +# CHECK-NEXT: [0,61] . . . . . . . . . D===============eER . . . . . . . . . . . . . max a0, a0, a0 +# CHECK-NEXT: [0,62] . . . . . . . . . D===============eER . . . . . . . . . . . . . maxu a0, a0, a0 +# CHECK-NEXT: [0,63] . . . . . . . . . D===============eER . . . . . . . . . . . . . rol a0, a0, a0 +# CHECK-NEXT: [0,64] . . . . . . . . . .D===============eER. . . . . . . . . . . . . ror a0, a0, a0 +# CHECK-NEXT: [0,65] . . . . . . . . . . D===============eER . . . . . . . . . . . . rori a0, a0, 1 +# CHECK-NEXT: [0,66] . . . . . . . . . . D================eeeER . . . . . . . . . . . . clz a0, a0 +# CHECK-NEXT: [0,67] . . . . . . . . . . D===================eeeER . . . . . . . . . . . clzw a0, a0 +# CHECK-NEXT: [0,68] . . . . . . . . . . D======================eeeER. . . . . . . . . . . ctz a0, a0 +# CHECK-NEXT: [0,69] . . . . . . . . . . D=========================eeeER . . . . . . . . . . ctzw a0, a0 +# CHECK-NEXT: [0,70] . . . . . . . . . . D============================eeeER . . . . . . . . . cpop a0, a0 +# CHECK-NEXT: [0,71] . . . . . . . . . . D==============================eER . . . . . . . . . add a0, a0, a0 +# CHECK-NEXT: [0,72] . . . . . . . . . . D===============================eeeER. . . . . . . . . cpopw a0, a0 +# CHECK-NEXT: [0,73] . . . . . . . . . . D=================================eER . . . . . . . . add a0, a0, a0 +# CHECK-NEXT: [0,74] . . . . . . . . . . D=================================eER . . . . . . . . rev8 a0, a0 +# CHECK-NEXT: [0,75] . . . . . . . . . . .D=================================eER . . . . . . . . orc.b a0, a0 +# CHECK-NEXT: [0,76] . . . . . . . . . . .D==================================eeeeeER . . . . . . . lb a0, 0(a0) +# CHECK-NEXT: [0,77] . . . . . . . . . . . D=====================================eER . . . . . . . add a0, a0, a0 +# CHECK-NEXT: [0,78] . . . . . . . . . . . D======================================eeeeeER . . . . . . lh a0, 0(a0) +# CHECK-NEXT: [0,79] . . . . . . . . . . . D=========================================eER . . . . . . and a0, a0, a0 +# CHECK-NEXT: [0,80] . . . . . . . . . . . D==========================================eeeeeER . . . . . lw a0, 0(a0) +# CHECK-NEXT: [0,81] . . . . . . . . . . . D=============================================eER . . . . . or a0, a0, a0 +# CHECK-NEXT: [0,82] . . . . . . . . . . . D==============================================eeeeeER . . . . ld a0, 0(a0) +# CHECK-NEXT: [0,83] . . . . . . . . . . . D=================================================eER . . . . xor a0, a0, a0 +# CHECK-NEXT: [0,84] . . . . . . . . . . . D==================================================eeeeeER . . . lbu a0, 0(a0) +# CHECK-NEXT: [0,85] . . . . . . . . . . . .D=====================================================eER . . . addi a0, a0, 1 +# CHECK-NEXT: [0,86] . . . . . . . . . . . .D======================================================eeeeeER . . lhu a0, 0(a0) +# CHECK-NEXT: [0,87] . . . . . . . . . . . . D=========================================================eER . . sub a0, a0, a0 +# CHECK-NEXT: [0,88] . . . . . . . . . . . . D==========================================================eeeeeER. lwu a0, 0(a0) +# CHECK-NEXT: [0,89] . . . . . . . . . . . . D=============================================================eER. addw a0, a0, a0 +# CHECK-NEXT: [0,90] . . . . . . . . . . . . D==============================================================eER jr a0 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 lui a0, 1 +# CHECK-NEXT: 1. 1 1.0 1.0 0.0 auipc a1, 1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 add a0, a0, a1 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 addi a0, a0, 1 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 addw a0, a0, a0 +# CHECK-NEXT: 5. 1 5.0 0.0 0.0 addiw a0, a0, 1 +# CHECK-NEXT: 6. 1 5.0 0.0 0.0 sub a0, a0, a0 +# CHECK-NEXT: 7. 1 6.0 0.0 0.0 subw a0, a0, a0 +# CHECK-NEXT: 8. 1 7.0 0.0 0.0 and a0, a0, a0 +# CHECK-NEXT: 9. 1 8.0 0.0 0.0 andi a0, a0, 1 +# CHECK-NEXT: 10. 1 9.0 0.0 0.0 or a0, a0, a0 +# CHECK-NEXT: 11. 1 10.0 0.0 0.0 ori a0, a0, 1 +# CHECK-NEXT: 12. 1 10.0 0.0 0.0 xor a0, a0, a0 +# CHECK-NEXT: 13. 1 11.0 0.0 0.0 xori a0, a0, 1 +# CHECK-NEXT: 14. 1 12.0 0.0 0.0 sll a0, a0, a0 +# CHECK-NEXT: 15. 1 13.0 0.0 0.0 slli a0, a0, 1 +# CHECK-NEXT: 16. 1 14.0 0.0 0.0 sllw a0, a0, a0 +# CHECK-NEXT: 17. 1 15.0 0.0 0.0 slliw a0, a0, 1 +# CHECK-NEXT: 18. 1 15.0 0.0 0.0 srl a0, a0, a0 +# CHECK-NEXT: 19. 1 16.0 0.0 0.0 srli a0, a0, 1 +# CHECK-NEXT: 20. 1 16.0 0.0 0.0 srlw a0, a0, a0 +# CHECK-NEXT: 21. 1 16.0 0.0 0.0 srliw a0, a0, 1 +# CHECK-NEXT: 22. 1 16.0 0.0 0.0 sra a0, a0, a0 +# CHECK-NEXT: 23. 1 16.0 0.0 0.0 srai a0, a0, 1 +# CHECK-NEXT: 24. 1 16.0 0.0 0.0 sraw a0, a0, a0 +# CHECK-NEXT: 25. 1 16.0 0.0 0.0 sraiw a0, a0, 1 +# CHECK-NEXT: 26. 1 16.0 0.0 0.0 slt a0, a0, a0 +# CHECK-NEXT: 27. 1 16.0 0.0 0.0 slti a0, a0, 1 +# CHECK-NEXT: 28. 1 16.0 0.0 0.0 sltu a0, a0, a0 +# CHECK-NEXT: 29. 1 16.0 0.0 0.0 seqz a0, a0 +# CHECK-NEXT: 30. 1 17.0 0.0 0.0 mul a0, a0, a0 +# CHECK-NEXT: 31. 1 19.0 0.0 0.0 add a0, a0, a0 +# CHECK-NEXT: 32. 1 20.0 0.0 0.0 mulw a0, a0, a0 +# CHECK-NEXT: 33. 1 22.0 0.0 0.0 add a0, a0, a0 +# CHECK-NEXT: 34. 1 23.0 0.0 0.0 beq a0, a0, .Ltmp0 +# CHECK-NEXT: 35. 1 22.0 0.0 0.0 add a0, a0, a0 +# CHECK-NEXT: 36. 1 23.0 0.0 0.0 bne a0, a0, .Ltmp1 +# CHECK-NEXT: 37. 1 22.0 0.0 0.0 add a0, a0, a0 +# CHECK-NEXT: 38. 1 23.0 0.0 0.0 blt a0, a0, .Ltmp2 +# CHECK-NEXT: 39. 1 22.0 0.0 0.0 add a0, a0, a0 +# CHECK-NEXT: 40. 1 23.0 0.0 0.0 bltu a0, a0, .Ltmp3 +# CHECK-NEXT: 41. 1 22.0 0.0 0.0 add a0, a0, a0 +# CHECK-NEXT: 42. 1 23.0 0.0 0.0 bge a0, a0, .Ltmp4 +# CHECK-NEXT: 43. 1 22.0 0.0 0.0 add a0, a0, a0 +# CHECK-NEXT: 44. 1 23.0 0.0 0.0 bgeu a0, a0, .Ltmp5 +# CHECK-NEXT: 45. 1 22.0 0.0 0.0 add.uw a0, a0, a0 +# CHECK-NEXT: 46. 1 22.0 0.0 0.0 slli.uw a0, a0, 1 +# CHECK-NEXT: 47. 1 22.0 0.0 0.0 sh1add.uw a0, a0, a0 +# CHECK-NEXT: 48. 1 22.0 0.0 0.0 sh2add.uw a0, a0, a0 +# CHECK-NEXT: 49. 1 22.0 0.0 0.0 sh3add.uw a0, a0, a0 +# CHECK-NEXT: 50. 1 22.0 0.0 0.0 sh1add a0, a0, a0 +# CHECK-NEXT: 51. 1 22.0 0.0 0.0 sh2add a0, a0, a0 +# CHECK-NEXT: 52. 1 22.0 0.0 0.0 sh3add a0, a0, a0 +# CHECK-NEXT: 53. 1 22.0 0.0 0.0 andn a0, a0, a0 +# CHECK-NEXT: 54. 1 19.0 0.0 0.0 orn a0, a0, a0 +# CHECK-NEXT: 55. 1 16.0 0.0 0.0 xnor a0, a0, a0 +# CHECK-NEXT: 56. 1 16.0 0.0 0.0 sext.b a0, a0 +# CHECK-NEXT: 57. 1 16.0 0.0 0.0 sext.h a0, a0 +# CHECK-NEXT: 58. 1 16.0 0.0 0.0 zext.h a0, a0 +# CHECK-NEXT: 59. 1 16.0 0.0 0.0 min a0, a0, a0 +# CHECK-NEXT: 60. 1 16.0 0.0 0.0 minu a0, a0, a0 +# CHECK-NEXT: 61. 1 16.0 0.0 0.0 max a0, a0, a0 +# CHECK-NEXT: 62. 1 16.0 0.0 0.0 maxu a0, a0, a0 +# CHECK-NEXT: 63. 1 16.0 0.0 0.0 rol a0, a0, a0 +# CHECK-NEXT: 64. 1 16.0 0.0 0.0 ror a0, a0, a0 +# CHECK-NEXT: 65. 1 16.0 0.0 0.0 rori a0, a0, 1 +# CHECK-NEXT: 66. 1 17.0 0.0 0.0 clz a0, a0 +# CHECK-NEXT: 67. 1 20.0 0.0 0.0 clzw a0, a0 +# CHECK-NEXT: 68. 1 23.0 0.0 0.0 ctz a0, a0 +# CHECK-NEXT: 69. 1 26.0 0.0 0.0 ctzw a0, a0 +# CHECK-NEXT: 70. 1 29.0 0.0 0.0 cpop a0, a0 +# CHECK-NEXT: 71. 1 31.0 0.0 0.0 add a0, a0, a0 +# CHECK-NEXT: 72. 1 32.0 0.0 0.0 cpopw a0, a0 +# CHECK-NEXT: 73. 1 34.0 0.0 0.0 add a0, a0, a0 +# CHECK-NEXT: 74. 1 34.0 0.0 0.0 rev8 a0, a0 +# CHECK-NEXT: 75. 1 34.0 0.0 0.0 orc.b a0, a0 +# CHECK-NEXT: 76. 1 35.0 0.0 0.0 lb a0, 0(a0) +# CHECK-NEXT: 77. 1 38.0 0.0 0.0 add a0, a0, a0 +# CHECK-NEXT: 78. 1 39.0 0.0 0.0 lh a0, 0(a0) +# CHECK-NEXT: 79. 1 42.0 0.0 0.0 and a0, a0, a0 +# CHECK-NEXT: 80. 1 43.0 0.0 0.0 lw a0, 0(a0) +# CHECK-NEXT: 81. 1 46.0 0.0 0.0 or a0, a0, a0 +# CHECK-NEXT: 82. 1 47.0 0.0 0.0 ld a0, 0(a0) +# CHECK-NEXT: 83. 1 50.0 0.0 0.0 xor a0, a0, a0 +# CHECK-NEXT: 84. 1 51.0 0.0 0.0 lbu a0, 0(a0) +# CHECK-NEXT: 85. 1 54.0 0.0 0.0 addi a0, a0, 1 +# CHECK-NEXT: 86. 1 55.0 0.0 0.0 lhu a0, 0(a0) +# CHECK-NEXT: 87. 1 58.0 0.0 0.0 sub a0, a0, a0 +# CHECK-NEXT: 88. 1 59.0 0.0 0.0 lwu a0, 0(a0) +# CHECK-NEXT: 89. 1 62.0 0.0 0.0 addw a0, a0, a0 +# CHECK-NEXT: 90. 1 63.0 0.0 0.0 jr a0 +# CHECK-NEXT: 1 22.7 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/RISCV/XiangShan/load-to-alu.s b/llvm/test/tools/llvm-mca/RISCV/XiangShan/load-to-alu.s new file mode 100644 index 0000000000000..e1925e7647e33 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/XiangShan/load-to-alu.s @@ -0,0 +1,73 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=xiangshan-nanhu -timeline -iterations=1 < %s | FileCheck %s + +# Test XiangShan load to ALU (4 cycles) +ld a1, 0(a0) +addi a2, a1, 1 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 8 +# CHECK-NEXT: Total uOps: 2 + +# CHECK: Dispatch Width: 6 +# CHECK-NEXT: uOps Per Cycle: 0.25 +# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: Block RThroughput: 0.5 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 5 0.50 * ld a1, 0(a0) +# CHECK-NEXT: 1 1 0.25 addi a2, a1, 1 + +# CHECK: Resources: +# CHECK-NEXT: [0.0] - XS2ALU +# CHECK-NEXT: [0.1] - XS2ALU +# CHECK-NEXT: [0.2] - XS2ALU +# CHECK-NEXT: [0.3] - XS2ALU +# CHECK-NEXT: [1.0] - XS2FMAC +# CHECK-NEXT: [1.1] - XS2FMAC +# CHECK-NEXT: [1.2] - XS2FMAC +# CHECK-NEXT: [1.3] - XS2FMAC +# CHECK-NEXT: [2.0] - XS2FMISC +# CHECK-NEXT: [2.1] - XS2FMISC +# CHECK-NEXT: [3.0] - XS2LD +# CHECK-NEXT: [3.1] - XS2LD +# CHECK-NEXT: [4.0] - XS2MDU +# CHECK-NEXT: [4.1] - XS2MDU +# CHECK-NEXT: [5] - XS2MISC +# CHECK-NEXT: [6.0] - XS2ST +# CHECK-NEXT: [6.1] - XS2ST + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0.0] [0.1] [0.2] [0.3] [1.0] [1.1] [1.2] [1.3] [2.0] [2.1] [3.0] [3.1] [4.0] [4.1] [5] [6.0] [6.1] +# CHECK-NEXT: - - - 1.00 - - - - - - - 1.00 - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0.0] [0.1] [0.2] [0.3] [1.0] [1.1] [1.2] [1.3] [2.0] [2.1] [3.0] [3.1] [4.0] [4.1] [5] [6.0] [6.1] Instructions: +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - ld a1, 0(a0) +# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - addi a2, a1, 1 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 01234567 + +# CHECK: [0,0] DeeeeeER ld a1, 0(a0) +# CHECK-NEXT: [0,1] D====eER addi a2, a1, 1 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld a1, 0(a0) +# CHECK-NEXT: 1. 1 5.0 0.0 0.0 addi a2, a1, 1 +# CHECK-NEXT: 1 3.0 0.5 0.0