From 66afbfd0893967cabf3d9fa701ffbcc997a151e2 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Thu, 10 Apr 2025 11:12:52 -0300 Subject: [PATCH 01/20] [RISCV] Add scheduler for x60 --- llvm/lib/Target/RISCV/RISCV.td | 1 + llvm/lib/Target/RISCV/RISCVProcessors.td | 2 +- .../lib/Target/RISCV/RISCVSchedSpacemitX60.td | 332 ++++++++++++++ .../RISCV/rvv/vxrm-insert-out-of-loop.ll | 50 +-- .../tools/llvm-mca/RISCV/SpacemitX60/atomic.s | 312 +++++++++++++ .../RISCV/SpacemitX60/floating-point.s | 334 ++++++++++++++ .../llvm-mca/RISCV/SpacemitX60/integer.s | 420 ++++++++++++++++++ 7 files changed, 1425 insertions(+), 26 deletions(-) create mode 100644 llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td create mode 100644 llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s create mode 100644 llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s create mode 100644 llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 2c2271e486a84..6a6cec88b74a4 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -57,6 +57,7 @@ include "RISCVSchedSyntacoreSCR345.td" include "RISCVSchedSyntacoreSCR7.td" include "RISCVSchedTTAscalonD8.td" include "RISCVSchedXiangShanNanHu.td" +include "RISCVSchedSpacemitX60.td" //===----------------------------------------------------------------------===// // RISC-V processors supported. diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 9d48adeec5e86..6e44518cb43f2 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -559,7 +559,7 @@ def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu", TuneShiftedZExtWFusion]>; def SPACEMIT_X60 : RISCVProcessorModel<"spacemit-x60", - NoSchedModel, + SpacemitX60Model, !listconcat(RVA22S64Features, [FeatureStdExtV, FeatureStdExtSscofpmf, diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td new file mode 100644 index 0000000000000..d1148cc2f69dc --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -0,0 +1,332 @@ +//=- RISCVSchedSpacemitX60.td - Spacemit X60 Scheduling Defs -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// Scheduler model for the SpacemiT-X60 processor based on documentation of the +// C908 and experiments on real hardware (bpi-f3). +// +//===----------------------------------------------------------------------===// + +def SpacemitX60Model : SchedMachineModel { + let IssueWidth = 2; // dual-issue + let MicroOpBufferSize = 0; // in-order + let LoadLatency = 5; // worse case: >= 3 + let MispredictPenalty = 9; // nine-stage + + let CompleteModel = 0; + + let UnsupportedFeatures = [HasStdExtZknd, HasStdExtZkne, HasStdExtZknh, + HasStdExtZksed, HasStdExtZksh, HasStdExtZkr]; +} + +let SchedModel = SpacemitX60Model in { + +//===----------------------------------------------------------------------===// +// Define processor resources for Spacemit-X60 + +// Information gathered from the C908 user manual: +let BufferSize = 0 in { + // The LSU supports dual issue for scalar store/load instructions + def SMX60_LS : ProcResource<2>; + + // An IEU can decode and issue two instructions at the same time + def SMX60_IEU : ProcResource<2>; + + def SMX60_FP : ProcResource<1>; +} + +//===----------------------------------------------------------------------===// + +// Branching +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Integer arithmetic and logic +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Integer multiplication +let Latency = 4 in { + def : WriteRes; + def : WriteRes; +} + +// Integer division/remainder +// Worst case latency is used. +def : WriteRes { let Latency = 12; } +def : WriteRes { let Latency = 20; } +def : WriteRes { let Latency = 12; } +def : WriteRes { let Latency = 20; } + +// Bitmanip +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; + +def : WriteRes; + +def : WriteRes; + +def : WriteRes; + +def : WriteRes; +def : WriteRes; + +// Single-bit instructions +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Memory/Atomic memory +let Latency = 3 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +let Latency = 5 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +// Atomics +let Latency = 5 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +// Floating point units Half precision +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } + +// Worst case latency is used +let Latency = 7, ReleaseAtCycles = [7] in { + def : WriteRes; + def : WriteRes; +} + +// Single precision +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 5; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } + +// Worst case latency is used +let Latency = 10, ReleaseAtCycles = [10] in { + def : WriteRes; + def : WriteRes; +} + +// Double precision +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 5; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } + +let Latency = 10, ReleaseAtCycles = [10] in { + def : WriteRes; + def : WriteRes; +} + +// Conversions +let Latency = 3 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +let Latency = 2 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +let Latency = 4 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +let Latency = 2 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +// Others +def : WriteRes; +def : WriteRes; + +//===----------------------------------------------------------------------===// +// Bypass and advance +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Bitmanip +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +// Single-bit instructions +def : ReadAdvance; +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Unsupported extensions +defm : UnsupportedSchedV; +defm : UnsupportedSchedXsfvcp; +defm : UnsupportedSchedZabha; +defm : UnsupportedSchedZbc; +defm : UnsupportedSchedZbkb; +defm : UnsupportedSchedZbkx; +defm : UnsupportedSchedZfa; +defm : UnsupportedSchedZvk; +defm : UnsupportedSchedSFB; +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll index 75f4b977a98b0..b384a0187a1ce 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll @@ -302,32 +302,32 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64X60-NEXT: .cfi_offset s4, -40 ; RV64X60-NEXT: li t0, 0 ; RV64X60-NEXT: li t1, 0 -; RV64X60-NEXT: addi t2, a7, -1 -; RV64X60-NEXT: add t4, a0, a6 -; RV64X60-NEXT: add t5, a2, a6 -; RV64X60-NEXT: add t3, a4, a6 -; RV64X60-NEXT: zext.w s0, t2 -; RV64X60-NEXT: mul s1, a1, s0 -; RV64X60-NEXT: add t4, t4, s1 -; RV64X60-NEXT: mul s1, a3, s0 -; RV64X60-NEXT: add t5, t5, s1 +; RV64X60-NEXT: addi s1, a7, -1 +; RV64X60-NEXT: zext.w s1, s1 +; RV64X60-NEXT: mul t2, a1, s1 +; RV64X60-NEXT: mul t3, a3, s1 +; RV64X60-NEXT: mul t4, a5, s1 +; RV64X60-NEXT: add s1, a0, a6 +; RV64X60-NEXT: add s0, a2, a6 +; RV64X60-NEXT: add t5, a4, a6 +; RV64X60-NEXT: add s2, s1, t2 ; RV64X60-NEXT: csrr t2, vlenb -; RV64X60-NEXT: mul s1, a5, s0 -; RV64X60-NEXT: add t3, t3, s1 -; RV64X60-NEXT: sltu s1, a0, t5 -; RV64X60-NEXT: sltu s0, a2, t4 -; RV64X60-NEXT: and t6, s1, s0 +; RV64X60-NEXT: add t3, t3, s0 +; RV64X60-NEXT: or t6, a1, a3 +; RV64X60-NEXT: add t4, t4, t5 +; RV64X60-NEXT: sltu s0, a0, t3 +; RV64X60-NEXT: sltu s1, a2, s2 +; RV64X60-NEXT: and t5, s0, s1 +; RV64X60-NEXT: slli t3, t2, 1 +; RV64X60-NEXT: slti s1, t6, 0 +; RV64X60-NEXT: sltu s0, a0, t4 +; RV64X60-NEXT: or t4, t5, s1 +; RV64X60-NEXT: sltu s1, a4, s2 +; RV64X60-NEXT: and s0, s0, s1 +; RV64X60-NEXT: or s1, a1, a5 ; RV64X60-NEXT: li t5, 32 -; RV64X60-NEXT: sltu s1, a0, t3 -; RV64X60-NEXT: sltu s0, a4, t4 -; RV64X60-NEXT: and t3, s1, s0 -; RV64X60-NEXT: or s1, a1, a3 ; RV64X60-NEXT: slti s1, s1, 0 -; RV64X60-NEXT: or t4, t6, s1 -; RV64X60-NEXT: or s0, a1, a5 -; RV64X60-NEXT: slti s0, s0, 0 -; RV64X60-NEXT: or s0, t3, s0 -; RV64X60-NEXT: slli t3, t2, 1 +; RV64X60-NEXT: or s0, s0, s1 ; RV64X60-NEXT: maxu s1, t3, t5 ; RV64X60-NEXT: or s0, t4, s0 ; RV64X60-NEXT: sltu s1, a6, s1 @@ -339,8 +339,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64X60-NEXT: # in Loop: Header=BB0_4 Depth=1 ; RV64X60-NEXT: add t5, t5, a1 ; RV64X60-NEXT: add a2, a2, a3 -; RV64X60-NEXT: add a4, a4, a5 ; RV64X60-NEXT: addiw t1, t1, 1 +; RV64X60-NEXT: add a4, a4, a5 ; RV64X60-NEXT: addi t0, t0, 1 ; RV64X60-NEXT: beq t1, a7, .LBB0_11 ; RV64X60-NEXT: .LBB0_4: # %for.cond1.preheader.us @@ -367,10 +367,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64X60-NEXT: vl2r.v v8, (s2) ; RV64X60-NEXT: vl2r.v v10, (s3) ; RV64X60-NEXT: sub s1, s1, t3 -; RV64X60-NEXT: add s3, s3, t3 ; RV64X60-NEXT: vaaddu.vv v8, v8, v10 ; RV64X60-NEXT: vs2r.v v8, (s4) ; RV64X60-NEXT: add s4, s4, t3 +; RV64X60-NEXT: add s3, s3, t3 ; RV64X60-NEXT: add s2, s2, t3 ; RV64X60-NEXT: bnez s1, .LBB0_7 ; RV64X60-NEXT: # %bb.8: # %middle.block diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s new file mode 100644 index 0000000000000..73109a78cd4b9 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s @@ -0,0 +1,312 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva22u64 -mcpu=spacemit-x60 -iterations=1 < %s | FileCheck %s + +# Zalrsc +lr.w t0, (t1) +lr.w.aq t1, (t2) +lr.w.rl t2, (t3) +lr.w.aqrl t3, (t4) +sc.w t6, t5, (t4) +sc.w.aq t5, t4, (t3) +sc.w.rl t4, t3, (t2) +sc.w.aqrl t3, t2, (t1) + +lr.d t0, (t1) +lr.d.aq t1, (t2) +lr.d.rl t2, (t3) +lr.d.aqrl t3, (t4) +sc.d t6, t5, (t4) +sc.d.aq t5, t4, (t3) +sc.d.rl t4, t3, (t2) +sc.d.aqrl t3, t2, (t1) + +# Zaamo +amoswap.w a4, ra, (s0) +amoadd.w a1, a2, (a3) +amoxor.w a2, a3, (a4) +amoand.w a3, a4, (a5) +amoor.w a4, a5, (a6) +amomin.w a5, a6, (a7) +amomax.w s7, s6, (s5) +amominu.w s6, s5, (s4) +amomaxu.w s5, s4, (s3) + +amoswap.w.aq a4, ra, (s0) +amoadd.w.aq a1, a2, (a3) +amoxor.w.aq a2, a3, (a4) +amoand.w.aq a3, a4, (a5) +amoor.w.aq a4, a5, (a6) +amomin.w.aq a5, a6, (a7) +amomax.w.aq s7, s6, (s5) +amominu.w.aq s6, s5, (s4) +amomaxu.w.aq s5, s4, (s3) + +amoswap.w.rl a4, ra, (s0) +amoadd.w.rl a1, a2, (a3) +amoxor.w.rl a2, a3, (a4) +amoand.w.rl a3, a4, (a5) +amoor.w.rl a4, a5, (a6) +amomin.w.rl a5, a6, (a7) +amomax.w.rl s7, s6, (s5) +amominu.w.rl s6, s5, (s4) +amomaxu.w.rl s5, s4, (s3) + +amoswap.w.aqrl a4, ra, (s0) +amoadd.w.aqrl a1, a2, (a3) +amoxor.w.aqrl a2, a3, (a4) +amoand.w.aqrl a3, a4, (a5) +amoor.w.aqrl a4, a5, (a6) +amomin.w.aqrl a5, a6, (a7) +amomax.w.aqrl s7, s6, (s5) +amominu.w.aqrl s6, s5, (s4) +amomaxu.w.aqrl s5, s4, (s3) + +amoswap.d a4, ra, (s0) +amoadd.d a1, a2, (a3) +amoxor.d a2, a3, (a4) +amoand.d a3, a4, (a5) +amoor.d a4, a5, (a6) +amomin.d a5, a6, (a7) +amomax.d s7, s6, (s5) +amominu.d s6, s5, (s4) +amomaxu.d s5, s4, (s3) + +amoswap.d.aq a4, ra, (s0) +amoadd.d.aq a1, a2, (a3) +amoxor.d.aq a2, a3, (a4) +amoand.d.aq a3, a4, (a5) +amoor.d.aq a4, a5, (a6) +amomin.d.aq a5, a6, (a7) +amomax.d.aq s7, s6, (s5) +amominu.d.aq s6, s5, (s4) +amomaxu.d.aq s5, s4, (s3) + +amoswap.d.rl a4, ra, (s0) +amoadd.d.rl a1, a2, (a3) +amoxor.d.rl a2, a3, (a4) +amoand.d.rl a3, a4, (a5) +amoor.d.rl a4, a5, (a6) +amomin.d.rl a5, a6, (a7) +amomax.d.rl s7, s6, (s5) +amominu.d.rl s6, s5, (s4) +amomaxu.d.rl s5, s4, (s3) + +amoswap.d.aqrl a4, ra, (s0) +amoadd.d.aqrl a1, a2, (a3) +amoxor.d.aqrl a2, a3, (a4) +amoand.d.aqrl a3, a4, (a5) +amoor.d.aqrl a4, a5, (a6) +amomin.d.aqrl a5, a6, (a7) +amomax.d.aqrl s7, s6, (s5) +amominu.d.aqrl s6, s5, (s4) +amomaxu.d.aqrl s5, s4, (s3) + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 88 +# CHECK-NEXT: Total Cycles: 86 +# CHECK-NEXT: Total uOps: 88 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.02 +# CHECK-NEXT: IPC: 1.02 +# CHECK-NEXT: Block RThroughput: 44.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 5 0.50 * lr.w t0, (t1) +# CHECK-NEXT: 1 5 0.50 * lr.w.aq t1, (t2) +# CHECK-NEXT: 1 5 0.50 * lr.w.rl t2, (t3) +# CHECK-NEXT: 1 5 0.50 * lr.w.aqrl t3, (t4) +# CHECK-NEXT: 1 3 0.50 * sc.w t6, t5, (t4) +# CHECK-NEXT: 1 3 0.50 * sc.w.aq t5, t4, (t3) +# CHECK-NEXT: 1 3 0.50 * sc.w.rl t4, t3, (t2) +# CHECK-NEXT: 1 3 0.50 * sc.w.aqrl t3, t2, (t1) +# CHECK-NEXT: 1 5 0.50 * lr.d t0, (t1) +# CHECK-NEXT: 1 5 0.50 * lr.d.aq t1, (t2) +# CHECK-NEXT: 1 5 0.50 * lr.d.rl t2, (t3) +# CHECK-NEXT: 1 5 0.50 * lr.d.aqrl t3, (t4) +# CHECK-NEXT: 1 3 0.50 * sc.d t6, t5, (t4) +# CHECK-NEXT: 1 3 0.50 * sc.d.aq t5, t4, (t3) +# CHECK-NEXT: 1 3 0.50 * sc.d.rl t4, t3, (t2) +# CHECK-NEXT: 1 3 0.50 * sc.d.aqrl t3, t2, (t1) +# CHECK-NEXT: 1 5 0.50 * * amoswap.w a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.w a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.w a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.w a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.w a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.w a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.w s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.w s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.w s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.w.aq a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.w.aq a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.w.aq a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.w.aq a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.w.aq a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.w.aq a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.w.aq s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.w.aq s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.w.aq s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.w.rl a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.w.rl a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.w.rl a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.w.rl a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.w.rl a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.w.rl a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.w.rl s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.w.rl s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.w.rl s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.w.aqrl a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.w.aqrl a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.w.aqrl a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.w.aqrl a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.w.aqrl a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.w.aqrl a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.w.aqrl s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.w.aqrl s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.w.aqrl s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.d a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.d a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.d a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.d a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.d a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.d a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.d s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.d s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.d s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.d.aq a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.d.aq a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.d.aq a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.d.aq a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.d.aq a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.d.aq a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.d.aq s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.d.aq s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.d.aq s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.d.rl a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.d.rl a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.d.rl a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.d.rl a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.d.rl a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.d.rl a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.d.rl s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.d.rl s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.d.rl s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.d.aqrl a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.d.aqrl a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.d.aqrl a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.d.aqrl a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.d.aqrl a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.d.aqrl a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.d.aqrl s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.d.aqrl s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.d.aqrl s5, s4, (s3) + +# CHECK: Resources: +# CHECK-NEXT: [0] - SMX60_FP +# CHECK-NEXT: [1.0] - SMX60_IEU +# CHECK-NEXT: [1.1] - SMX60_IEU +# CHECK-NEXT: [2.0] - SMX60_LS +# CHECK-NEXT: [2.1] - SMX60_LS + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] +# CHECK-NEXT: - - - 44.00 44.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] Instructions: +# CHECK-NEXT: - - - - 1.00 lr.w t0, (t1) +# CHECK-NEXT: - - - 1.00 - lr.w.aq t1, (t2) +# CHECK-NEXT: - - - - 1.00 lr.w.rl t2, (t3) +# CHECK-NEXT: - - - 1.00 - lr.w.aqrl t3, (t4) +# CHECK-NEXT: - - - - 1.00 sc.w t6, t5, (t4) +# CHECK-NEXT: - - - 1.00 - sc.w.aq t5, t4, (t3) +# CHECK-NEXT: - - - - 1.00 sc.w.rl t4, t3, (t2) +# CHECK-NEXT: - - - 1.00 - sc.w.aqrl t3, t2, (t1) +# CHECK-NEXT: - - - - 1.00 lr.d t0, (t1) +# CHECK-NEXT: - - - 1.00 - lr.d.aq t1, (t2) +# CHECK-NEXT: - - - - 1.00 lr.d.rl t2, (t3) +# CHECK-NEXT: - - - 1.00 - lr.d.aqrl t3, (t4) +# CHECK-NEXT: - - - - 1.00 sc.d t6, t5, (t4) +# CHECK-NEXT: - - - 1.00 - sc.d.aq t5, t4, (t3) +# CHECK-NEXT: - - - - 1.00 sc.d.rl t4, t3, (t2) +# CHECK-NEXT: - - - 1.00 - sc.d.aqrl t3, t2, (t1) +# CHECK-NEXT: - - - - 1.00 amoswap.w a4, ra, (s0) +# CHECK-NEXT: - - - 1.00 - amoadd.w a1, a2, (a3) +# CHECK-NEXT: - - - - 1.00 amoxor.w a2, a3, (a4) +# CHECK-NEXT: - - - 1.00 - amoand.w a3, a4, (a5) +# CHECK-NEXT: - - - - 1.00 amoor.w a4, a5, (a6) +# CHECK-NEXT: - - - 1.00 - amomin.w a5, a6, (a7) +# CHECK-NEXT: - - - - 1.00 amomax.w s7, s6, (s5) +# CHECK-NEXT: - - - 1.00 - amominu.w s6, s5, (s4) +# CHECK-NEXT: - - - - 1.00 amomaxu.w s5, s4, (s3) +# CHECK-NEXT: - - - 1.00 - amoswap.w.aq a4, ra, (s0) +# CHECK-NEXT: - - - - 1.00 amoadd.w.aq a1, a2, (a3) +# CHECK-NEXT: - - - 1.00 - amoxor.w.aq a2, a3, (a4) +# CHECK-NEXT: - - - - 1.00 amoand.w.aq a3, a4, (a5) +# CHECK-NEXT: - - - 1.00 - amoor.w.aq a4, a5, (a6) +# CHECK-NEXT: - - - - 1.00 amomin.w.aq a5, a6, (a7) +# CHECK-NEXT: - - - 1.00 - amomax.w.aq s7, s6, (s5) +# CHECK-NEXT: - - - - 1.00 amominu.w.aq s6, s5, (s4) +# CHECK-NEXT: - - - 1.00 - amomaxu.w.aq s5, s4, (s3) +# CHECK-NEXT: - - - - 1.00 amoswap.w.rl a4, ra, (s0) +# CHECK-NEXT: - - - 1.00 - amoadd.w.rl a1, a2, (a3) +# CHECK-NEXT: - - - - 1.00 amoxor.w.rl a2, a3, (a4) +# CHECK-NEXT: - - - 1.00 - amoand.w.rl a3, a4, (a5) +# CHECK-NEXT: - - - - 1.00 amoor.w.rl a4, a5, (a6) +# CHECK-NEXT: - - - 1.00 - amomin.w.rl a5, a6, (a7) +# CHECK-NEXT: - - - - 1.00 amomax.w.rl s7, s6, (s5) +# CHECK-NEXT: - - - 1.00 - amominu.w.rl s6, s5, (s4) +# CHECK-NEXT: - - - - 1.00 amomaxu.w.rl s5, s4, (s3) +# CHECK-NEXT: - - - 1.00 - amoswap.w.aqrl a4, ra, (s0) +# CHECK-NEXT: - - - - 1.00 amoadd.w.aqrl a1, a2, (a3) +# CHECK-NEXT: - - - 1.00 - amoxor.w.aqrl a2, a3, (a4) +# CHECK-NEXT: - - - - 1.00 amoand.w.aqrl a3, a4, (a5) +# CHECK-NEXT: - - - 1.00 - amoor.w.aqrl a4, a5, (a6) +# CHECK-NEXT: - - - - 1.00 amomin.w.aqrl a5, a6, (a7) +# CHECK-NEXT: - - - 1.00 - amomax.w.aqrl s7, s6, (s5) +# CHECK-NEXT: - - - - 1.00 amominu.w.aqrl s6, s5, (s4) +# CHECK-NEXT: - - - 1.00 - amomaxu.w.aqrl s5, s4, (s3) +# CHECK-NEXT: - - - - 1.00 amoswap.d a4, ra, (s0) +# CHECK-NEXT: - - - 1.00 - amoadd.d a1, a2, (a3) +# CHECK-NEXT: - - - - 1.00 amoxor.d a2, a3, (a4) +# CHECK-NEXT: - - - 1.00 - amoand.d a3, a4, (a5) +# CHECK-NEXT: - - - - 1.00 amoor.d a4, a5, (a6) +# CHECK-NEXT: - - - 1.00 - amomin.d a5, a6, (a7) +# CHECK-NEXT: - - - - 1.00 amomax.d s7, s6, (s5) +# CHECK-NEXT: - - - 1.00 - amominu.d s6, s5, (s4) +# CHECK-NEXT: - - - - 1.00 amomaxu.d s5, s4, (s3) +# CHECK-NEXT: - - - 1.00 - amoswap.d.aq a4, ra, (s0) +# CHECK-NEXT: - - - - 1.00 amoadd.d.aq a1, a2, (a3) +# CHECK-NEXT: - - - 1.00 - amoxor.d.aq a2, a3, (a4) +# CHECK-NEXT: - - - - 1.00 amoand.d.aq a3, a4, (a5) +# CHECK-NEXT: - - - 1.00 - amoor.d.aq a4, a5, (a6) +# CHECK-NEXT: - - - - 1.00 amomin.d.aq a5, a6, (a7) +# CHECK-NEXT: - - - 1.00 - amomax.d.aq s7, s6, (s5) +# CHECK-NEXT: - - - - 1.00 amominu.d.aq s6, s5, (s4) +# CHECK-NEXT: - - - 1.00 - amomaxu.d.aq s5, s4, (s3) +# CHECK-NEXT: - - - - 1.00 amoswap.d.rl a4, ra, (s0) +# CHECK-NEXT: - - - 1.00 - amoadd.d.rl a1, a2, (a3) +# CHECK-NEXT: - - - - 1.00 amoxor.d.rl a2, a3, (a4) +# CHECK-NEXT: - - - 1.00 - amoand.d.rl a3, a4, (a5) +# CHECK-NEXT: - - - - 1.00 amoor.d.rl a4, a5, (a6) +# CHECK-NEXT: - - - 1.00 - amomin.d.rl a5, a6, (a7) +# CHECK-NEXT: - - - - 1.00 amomax.d.rl s7, s6, (s5) +# CHECK-NEXT: - - - 1.00 - amominu.d.rl s6, s5, (s4) +# CHECK-NEXT: - - - - 1.00 amomaxu.d.rl s5, s4, (s3) +# CHECK-NEXT: - - - 1.00 - amoswap.d.aqrl a4, ra, (s0) +# CHECK-NEXT: - - - - 1.00 amoadd.d.aqrl a1, a2, (a3) +# CHECK-NEXT: - - - 1.00 - amoxor.d.aqrl a2, a3, (a4) +# CHECK-NEXT: - - - - 1.00 amoand.d.aqrl a3, a4, (a5) +# CHECK-NEXT: - - - 1.00 - amoor.d.aqrl a4, a5, (a6) +# CHECK-NEXT: - - - - 1.00 amomin.d.aqrl a5, a6, (a7) +# CHECK-NEXT: - - - 1.00 - amomax.d.aqrl s7, s6, (s5) +# CHECK-NEXT: - - - - 1.00 amominu.d.aqrl s6, s5, (s4) +# CHECK-NEXT: - - - 1.00 - amomaxu.d.aqrl s5, s4, (s3) diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s new file mode 100644 index 0000000000000..1cd6f2a91f2b7 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s @@ -0,0 +1,334 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva22u64 -mcpu=spacemit-x60 -iterations=1 < %s | FileCheck %s + +# Floating-Point Load and Store Instructions +## Half-Precision +flh ft0, 0(a0) +fsh ft0, 0(a0) + +## Single-Precision +flw ft0, 0(a0) +fsw ft0, 0(a0) + +## Double-Precision +fld ft0, 0(a0) +fsd ft0, 0(a0) + +# Floating-Point Computational Instructions +## Half-Precision +fadd.h f26, f27, f28 +fsub.h f29, f30, f31 +fmul.h ft0, ft1, ft2 +fdiv.h ft3, ft4, ft5 +fsqrt.h ft6, ft7 +fmin.h fa5, fa6, fa7 +fmax.h fs2, fs3, fs4 +fmadd.h f10, f11, f12, f31 +fmsub.h f14, f15, f16, f17 +fnmsub.h f18, f19, f20, f21 +fnmadd.h f22, f23, f24, f25 + +## Single-Precision +fadd.s f26, f27, f28 +fsub.s f29, f30, f31 +fmul.s ft0, ft1, ft2 +fdiv.s ft3, ft4, ft5 +fsqrt.s ft6, ft7 +fmin.s fa5, fa6, fa7 +fmax.s fs2, fs3, fs4 +fmadd.s f10, f11, f12, f31 +fmsub.s f14, f15, f16, f17 +fnmsub.s f18, f19, f20, f21 +fnmadd.s f22, f23, f24, f25 + +## Double-Precision +fadd.d f26, f27, f28 +fsub.d f29, f30, f31 +fmul.d ft0, ft1, ft2 +fdiv.d ft3, ft4, ft5 +fsqrt.d ft6, ft7 +fmin.d fa5, fa6, fa7 +fmax.d fs2, fs3, fs4 +fmadd.d f10, f11, f12, f31 +fmsub.d f14, f15, f16, f17 +fnmsub.d f18, f19, f20, f21 +fnmadd.d f22, f23, f24, f25 + +# Floating-Point Conversion and Move Instructions +## Half-Precision +fmv.x.h a2, fs7 +fmv.h.x ft1, a6 + +fcvt.s.h fa0, ft0 +fcvt.s.h fa0, ft0, rup + +fcvt.h.s ft2, fa2 +fcvt.d.h fa0, ft0 + +fcvt.d.h fa0, ft0, rup +fcvt.h.d ft2, fa2 + +## Single-Precision +fcvt.w.s a0, fs5 +fcvt.wu.s a1, fs6 +fcvt.s.w ft11, a4 +fcvt.s.wu ft0, a5 + +fcvt.l.s a0, ft0 +fcvt.lu.s a1, ft1 +fcvt.s.l ft2, a2 +fcvt.s.lu ft3, a3 + +fmv.x.w a2, fs7 +fmv.w.x ft1, a6 + +fsgnj.s fs1, fa0, fa1 +fsgnjn.s fa1, fa3, fa4 + +## Double-Precision +fcvt.wu.d a4, ft11 +fcvt.w.d a4, ft11 +fcvt.d.w ft0, a5 +fcvt.d.wu ft1, a6 + +fcvt.s.d fs5, fs6 +fcvt.d.s fs7, fs8 + +fcvt.l.d a0, ft0 +fcvt.lu.d a1, ft1 +fcvt.d.l ft3, a3 +fcvt.d.lu ft4, a4 + +fmv.x.d a2, ft2 +fmv.d.x ft5, a5 + +fsgnj.d fs1, fa0, fa1 +fsgnjn.d fa1, fa3, fa4 + +# Floating-Point Compare Instructions +## Half-Precision +feq.h a1, fs8, fs9 +flt.h a2, fs10, fs11 +fle.h a3, ft8, ft9 + +## Single-Precision +feq.s a1, fs8, fs9 +flt.s a2, fs10, fs11 +fle.s a3, ft8, ft9 + +## Double-Precision +feq.d a1, fs8, fs9 +flt.d a2, fs10, fs11 +fle.d a3, ft8, ft9 + +# Floating-Point Classify Instruction +## Half-Precision +fclass.s a3, ft10 +## Single-Precision +fclass.s a3, ft10 +## Double-Precision +fclass.d a3, ft10 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 85 +# CHECK-NEXT: Total Cycles: 138 +# CHECK-NEXT: Total uOps: 85 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.62 +# CHECK-NEXT: IPC: 0.62 +# CHECK-NEXT: Block RThroughput: 105.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 5 0.50 * flh ft0, 0(a0) +# CHECK-NEXT: 1 3 0.50 * fsh ft0, 0(a0) +# CHECK-NEXT: 1 5 0.50 * flw ft0, 0(a0) +# CHECK-NEXT: 1 3 0.50 * fsw ft0, 0(a0) +# CHECK-NEXT: 1 5 0.50 * fld ft0, 0(a0) +# CHECK-NEXT: 1 3 0.50 * fsd ft0, 0(a0) +# CHECK-NEXT: 1 3 1.00 fadd.h fs10, fs11, ft8 +# CHECK-NEXT: 1 3 1.00 fsub.h ft9, ft10, ft11 +# CHECK-NEXT: 1 3 1.00 fmul.h ft0, ft1, ft2 +# CHECK-NEXT: 1 7 7.00 fdiv.h ft3, ft4, ft5 +# CHECK-NEXT: 1 7 7.00 fsqrt.h ft6, ft7 +# CHECK-NEXT: 1 3 1.00 fmin.h fa5, fa6, fa7 +# CHECK-NEXT: 1 3 1.00 fmax.h fs2, fs3, fs4 +# CHECK-NEXT: 1 4 1.00 fmadd.h fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1 4 1.00 fmsub.h fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1 4 1.00 fnmsub.h fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1 4 1.00 fnmadd.h fs6, fs7, fs8, fs9 +# CHECK-NEXT: 1 3 1.00 fadd.s fs10, fs11, ft8 +# CHECK-NEXT: 1 3 1.00 fsub.s ft9, ft10, ft11 +# CHECK-NEXT: 1 4 1.00 fmul.s ft0, ft1, ft2 +# CHECK-NEXT: 1 10 10.00 fdiv.s ft3, ft4, ft5 +# CHECK-NEXT: 1 10 10.00 fsqrt.s ft6, ft7 +# CHECK-NEXT: 1 3 1.00 fmin.s fa5, fa6, fa7 +# CHECK-NEXT: 1 3 1.00 fmax.s fs2, fs3, fs4 +# CHECK-NEXT: 1 5 1.00 fmadd.s fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1 5 1.00 fmsub.s fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1 5 1.00 fnmsub.s fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1 5 1.00 fnmadd.s fs6, fs7, fs8, fs9 +# CHECK-NEXT: 1 4 1.00 fadd.d fs10, fs11, ft8 +# CHECK-NEXT: 1 4 1.00 fsub.d ft9, ft10, ft11 +# CHECK-NEXT: 1 4 1.00 fmul.d ft0, ft1, ft2 +# CHECK-NEXT: 1 10 10.00 fdiv.d ft3, ft4, ft5 +# CHECK-NEXT: 1 10 10.00 fsqrt.d ft6, ft7 +# CHECK-NEXT: 1 3 1.00 fmin.d fa5, fa6, fa7 +# CHECK-NEXT: 1 3 1.00 fmax.d fs2, fs3, fs4 +# CHECK-NEXT: 1 5 1.00 fmadd.d fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1 5 1.00 fmsub.d fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1 5 1.00 fnmsub.d fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1 5 1.00 fnmadd.d fs6, fs7, fs8, fs9 +# CHECK-NEXT: 1 2 0.50 fmv.x.h a2, fs7 +# CHECK-NEXT: 1 2 0.50 fmv.h.x ft1, a6 +# CHECK-NEXT: 1 3 1.00 fcvt.s.h fa0, ft0 +# CHECK-NEXT: 1 3 1.00 fcvt.s.h fa0, ft0, rup +# CHECK-NEXT: 1 3 1.00 fcvt.h.s ft2, fa2 +# CHECK-NEXT: 1 3 1.00 fcvt.d.h fa0, ft0 +# CHECK-NEXT: 1 3 1.00 fcvt.d.h fa0, ft0, rup +# CHECK-NEXT: 1 3 1.00 fcvt.h.d ft2, fa2 +# CHECK-NEXT: 1 3 0.50 fcvt.w.s a0, fs5 +# CHECK-NEXT: 1 3 0.50 fcvt.wu.s a1, fs6 +# CHECK-NEXT: 1 3 0.50 fcvt.s.w ft11, a4 +# CHECK-NEXT: 1 3 0.50 fcvt.s.wu ft0, a5 +# CHECK-NEXT: 1 3 0.50 fcvt.l.s a0, ft0 +# CHECK-NEXT: 1 3 0.50 fcvt.lu.s a1, ft1 +# CHECK-NEXT: 1 3 0.50 fcvt.s.l ft2, a2 +# CHECK-NEXT: 1 3 0.50 fcvt.s.lu ft3, a3 +# CHECK-NEXT: 1 2 0.50 fmv.x.w a2, fs7 +# CHECK-NEXT: 1 2 0.50 fmv.w.x ft1, a6 +# CHECK-NEXT: 1 3 1.00 fsgnj.s fs1, fa0, fa1 +# CHECK-NEXT: 1 3 1.00 fsgnjn.s fa1, fa3, fa4 +# CHECK-NEXT: 1 3 0.50 fcvt.wu.d a4, ft11 +# CHECK-NEXT: 1 3 0.50 fcvt.w.d a4, ft11 +# CHECK-NEXT: 1 3 0.50 fcvt.d.w ft0, a5 +# CHECK-NEXT: 1 3 0.50 fcvt.d.wu ft1, a6 +# CHECK-NEXT: 1 3 1.00 fcvt.s.d fs5, fs6 +# CHECK-NEXT: 1 3 1.00 fcvt.d.s fs7, fs8 +# CHECK-NEXT: 1 3 0.50 fcvt.l.d a0, ft0 +# CHECK-NEXT: 1 3 0.50 fcvt.lu.d a1, ft1 +# CHECK-NEXT: 1 3 0.50 fcvt.d.l ft3, a3 +# CHECK-NEXT: 1 3 0.50 fcvt.d.lu ft4, a4 +# CHECK-NEXT: 1 2 0.50 fmv.x.d a2, ft2 +# CHECK-NEXT: 1 2 0.50 fmv.d.x ft5, a5 +# CHECK-NEXT: 1 3 1.00 fsgnj.d fs1, fa0, fa1 +# CHECK-NEXT: 1 3 1.00 fsgnjn.d fa1, fa3, fa4 +# CHECK-NEXT: 1 4 1.00 feq.h a1, fs8, fs9 +# CHECK-NEXT: 1 4 1.00 flt.h a2, fs10, fs11 +# CHECK-NEXT: 1 4 1.00 fle.h a3, ft8, ft9 +# CHECK-NEXT: 1 4 1.00 feq.s a1, fs8, fs9 +# CHECK-NEXT: 1 4 1.00 flt.s a2, fs10, fs11 +# CHECK-NEXT: 1 4 1.00 fle.s a3, ft8, ft9 +# CHECK-NEXT: 1 4 1.00 feq.d a1, fs8, fs9 +# CHECK-NEXT: 1 4 1.00 flt.d a2, fs10, fs11 +# CHECK-NEXT: 1 4 1.00 fle.d a3, ft8, ft9 +# CHECK-NEXT: 1 2 1.00 fclass.s a3, ft10 +# CHECK-NEXT: 1 2 1.00 fclass.s a3, ft10 +# CHECK-NEXT: 1 2 1.00 fclass.d a3, ft10 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SMX60_FP +# CHECK-NEXT: [1.0] - SMX60_IEU +# CHECK-NEXT: [1.1] - SMX60_IEU +# CHECK-NEXT: [2.0] - SMX60_LS +# CHECK-NEXT: [2.1] - SMX60_LS + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] +# CHECK-NEXT: 105.00 11.00 11.00 3.00 3.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] Instructions: +# CHECK-NEXT: - - - - 1.00 flh ft0, 0(a0) +# CHECK-NEXT: - - - 1.00 - fsh ft0, 0(a0) +# CHECK-NEXT: - - - - 1.00 flw ft0, 0(a0) +# CHECK-NEXT: - - - 1.00 - fsw ft0, 0(a0) +# CHECK-NEXT: - - - - 1.00 fld ft0, 0(a0) +# CHECK-NEXT: - - - 1.00 - fsd ft0, 0(a0) +# CHECK-NEXT: 1.00 - - - - fadd.h fs10, fs11, ft8 +# CHECK-NEXT: 1.00 - - - - fsub.h ft9, ft10, ft11 +# CHECK-NEXT: 1.00 - - - - fmul.h ft0, ft1, ft2 +# CHECK-NEXT: 7.00 - - - - fdiv.h ft3, ft4, ft5 +# CHECK-NEXT: 7.00 - - - - fsqrt.h ft6, ft7 +# CHECK-NEXT: 1.00 - - - - fmin.h fa5, fa6, fa7 +# CHECK-NEXT: 1.00 - - - - fmax.h fs2, fs3, fs4 +# CHECK-NEXT: 1.00 - - - - fmadd.h fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1.00 - - - - fmsub.h fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1.00 - - - - fnmsub.h fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1.00 - - - - fnmadd.h fs6, fs7, fs8, fs9 +# CHECK-NEXT: 1.00 - - - - fadd.s fs10, fs11, ft8 +# CHECK-NEXT: 1.00 - - - - fsub.s ft9, ft10, ft11 +# CHECK-NEXT: 1.00 - - - - fmul.s ft0, ft1, ft2 +# CHECK-NEXT: 10.00 - - - - fdiv.s ft3, ft4, ft5 +# CHECK-NEXT: 10.00 - - - - fsqrt.s ft6, ft7 +# CHECK-NEXT: 1.00 - - - - fmin.s fa5, fa6, fa7 +# CHECK-NEXT: 1.00 - - - - fmax.s fs2, fs3, fs4 +# CHECK-NEXT: 1.00 - - - - fmadd.s fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1.00 - - - - fmsub.s fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1.00 - - - - fnmsub.s fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1.00 - - - - fnmadd.s fs6, fs7, fs8, fs9 +# CHECK-NEXT: 1.00 - - - - fadd.d fs10, fs11, ft8 +# CHECK-NEXT: 1.00 - - - - fsub.d ft9, ft10, ft11 +# CHECK-NEXT: 1.00 - - - - fmul.d ft0, ft1, ft2 +# CHECK-NEXT: 10.00 - - - - fdiv.d ft3, ft4, ft5 +# CHECK-NEXT: 10.00 - - - - fsqrt.d ft6, ft7 +# CHECK-NEXT: 1.00 - - - - fmin.d fa5, fa6, fa7 +# CHECK-NEXT: 1.00 - - - - fmax.d fs2, fs3, fs4 +# CHECK-NEXT: 1.00 - - - - fmadd.d fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1.00 - - - - fmsub.d fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1.00 - - - - fnmsub.d fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1.00 - - - - fnmadd.d fs6, fs7, fs8, fs9 +# CHECK-NEXT: - - 1.00 - - fmv.x.h a2, fs7 +# CHECK-NEXT: - 1.00 - - - fmv.h.x ft1, a6 +# CHECK-NEXT: 1.00 - - - - fcvt.s.h fa0, ft0 +# CHECK-NEXT: 1.00 - - - - fcvt.s.h fa0, ft0, rup +# CHECK-NEXT: 1.00 - - - - fcvt.h.s ft2, fa2 +# CHECK-NEXT: 1.00 - - - - fcvt.d.h fa0, ft0 +# CHECK-NEXT: 1.00 - - - - fcvt.d.h fa0, ft0, rup +# CHECK-NEXT: 1.00 - - - - fcvt.h.d ft2, fa2 +# CHECK-NEXT: - - 1.00 - - fcvt.w.s a0, fs5 +# CHECK-NEXT: - 1.00 - - - fcvt.wu.s a1, fs6 +# CHECK-NEXT: - - 1.00 - - fcvt.s.w ft11, a4 +# CHECK-NEXT: - 1.00 - - - fcvt.s.wu ft0, a5 +# CHECK-NEXT: - - 1.00 - - fcvt.l.s a0, ft0 +# CHECK-NEXT: - 1.00 - - - fcvt.lu.s a1, ft1 +# CHECK-NEXT: - - 1.00 - - fcvt.s.l ft2, a2 +# CHECK-NEXT: - 1.00 - - - fcvt.s.lu ft3, a3 +# CHECK-NEXT: - - 1.00 - - fmv.x.w a2, fs7 +# CHECK-NEXT: - 1.00 - - - fmv.w.x ft1, a6 +# CHECK-NEXT: 1.00 - - - - fsgnj.s fs1, fa0, fa1 +# CHECK-NEXT: 1.00 - - - - fsgnjn.s fa1, fa3, fa4 +# CHECK-NEXT: - - 1.00 - - fcvt.wu.d a4, ft11 +# CHECK-NEXT: - 1.00 - - - fcvt.w.d a4, ft11 +# CHECK-NEXT: - - 1.00 - - fcvt.d.w ft0, a5 +# CHECK-NEXT: - 1.00 - - - fcvt.d.wu ft1, a6 +# CHECK-NEXT: 1.00 - - - - fcvt.s.d fs5, fs6 +# CHECK-NEXT: 1.00 - - - - fcvt.d.s fs7, fs8 +# CHECK-NEXT: - - 1.00 - - fcvt.l.d a0, ft0 +# CHECK-NEXT: - 1.00 - - - fcvt.lu.d a1, ft1 +# CHECK-NEXT: - - 1.00 - - fcvt.d.l ft3, a3 +# CHECK-NEXT: - 1.00 - - - fcvt.d.lu ft4, a4 +# CHECK-NEXT: - - 1.00 - - fmv.x.d a2, ft2 +# CHECK-NEXT: - 1.00 - - - fmv.d.x ft5, a5 +# CHECK-NEXT: 1.00 - - - - fsgnj.d fs1, fa0, fa1 +# CHECK-NEXT: 1.00 - - - - fsgnjn.d fa1, fa3, fa4 +# CHECK-NEXT: 1.00 - - - - feq.h a1, fs8, fs9 +# CHECK-NEXT: 1.00 - - - - flt.h a2, fs10, fs11 +# CHECK-NEXT: 1.00 - - - - fle.h a3, ft8, ft9 +# CHECK-NEXT: 1.00 - - - - feq.s a1, fs8, fs9 +# CHECK-NEXT: 1.00 - - - - flt.s a2, fs10, fs11 +# CHECK-NEXT: 1.00 - - - - fle.s a3, ft8, ft9 +# CHECK-NEXT: 1.00 - - - - feq.d a1, fs8, fs9 +# CHECK-NEXT: 1.00 - - - - flt.d a2, fs10, fs11 +# CHECK-NEXT: 1.00 - - - - fle.d a3, ft8, ft9 +# CHECK-NEXT: 1.00 - - - - fclass.s a3, ft10 +# CHECK-NEXT: 1.00 - - - - fclass.s a3, ft10 +# CHECK-NEXT: 1.00 - - - - fclass.d a3, ft10 diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s new file mode 100644 index 0000000000000..1c9d57a5b553f --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s @@ -0,0 +1,420 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva22u64 -mcpu=spacemit-x60 -iterations=1 < %s | FileCheck %s + +# Integer Register-Immediate Instructions +addi a0, a0, 1 +addiw a0, a0, 1 +slti a0, a0, 1 +sltiu a0, a0, 1 + +andi a0, a0, 1 +ori a0, a0, 1 +xori a0, a0, 1 + +slli a0, a0, 1 +srli a0, a0, 1 +srai a0, a0, 1 +slliw a0, a0, 1 +srliw a0, a0, 1 +sraiw a0, a0, 1 + +lui a0, 1 +auipc a1, 1 + +# Integer Register-Register Operations +add a0, a0, a1 +addw a0, a0, a0 +slt a0, a0, a0 +sltu a0, a0, a0 + +and a0, a0, a0 +or a0, a0, a0 +xor a0, a0, a0 + +sll a0, a0, a0 +srl a0, a0, a0 +sra a0, a0, a0 +sllw a0, a0, a0 +srlw a0, a0, a0 +sraw a0, a0, a0 + +sub a0, a0, a0 +subw a0, a0, a0 + +# Control Transfer Instructions + +## Unconditional Jumps +jal a0, 1f +1: +jalr a0 +beq a0, a0, 1f +1: +bne a0, a0, 1f +1: +blt a0, a0, 1f +1: +bltu a0, a0, 1f +1: +bge a0, a0, 1f +1: +bgeu a0, a0, 1f +1: +add a0, a0, a0 + +# Load and Store Instructions +lb t0, 0(a0) +lbu t0, 0(a0) +lh t0, 0(a0) +lhu t0, 0(a0) +lw t0, 0(a0) +lwu t0, 0(a0) +ld t0, 0(a0) + +sb t0, 0(a0) +sh t0, 0(a0) +sw t0, 0(a0) +sd t0, 0(a0) + +# Multiply/Division +mul a0, a0, a0 +mulh a0, a0, a0 +mulhu a0, a0, a0 +mulhsu a0, a0, a0 +mulw a0, a0, a0 +div a0, a1, a2 +divu a0, a1, a2 +rem a0, a1, a2 +remu a0, a1, a2 +divw a0, a1, a2 +divuw a0, a1, a2 +remw a0, a1, a2 +remuw a0, a1, a2 + +# Zicsr +csrrw t0, 0xfff, t1 +csrrs s3, 0x001, s5 +csrrc sp, 0x000, ra +csrrwi a5, 0x000, 0 +csrrsi t2, 0xfff, 31 +csrrci t1, 0x140, 5 + +# Zicond +czero.eqz a0, a1, a2 +czero.nez a0, a1, a2 + +# Zicond +czero.eqz a0, a1, a2 +czero.nez a0, a1, a2 + +# Zba +add.uw a0, a0, a0 +slli.uw a0, a0, 1 +sh1add.uw a0, a0, a0 +sh2add.uw a0, a0, a0 +sh3add.uw a0, a0, a0 +sh1add a0, a0, a0 +sh2add a0, a0, a0 +sh3add a0, a0, a0 + +# Zbb +andn a0, a0, a0 +orn a0, a0, a0 +xnor a0, a0, a0 + +clz a0, a0 +clzw a0, a0 +ctz a0, a0 +ctzw a0, a0 + +cpop a0, a0 +cpopw a0, a0 + +min a0, a0, a0 +minu a0, a0, a0 +max a0, a0, a0 +maxu a0, a0, a0 + +sext.b a0, a0 +sext.h a0, a0 +zext.h a0, a0 + +rol a0, a0, a0 +rolw a0, a0, a0 +ror a0, a0, a0 +rorw a0, a0, a0 +rori a0, a0, 1 +roriw a0, a0, 1 + +orc.b a0, a0 + +rev8 a0, a0 + +# Zbs +bclr a0, a1, a2 +bclri a0, a1, 1 +bext a0, a1, a2 +bexti a0, a1, 1 +binv a0, a1, a2 +binvi a0, a1, 1 +bset a0, a1, a2 +bseti a0, a1, 1 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 113 +# CHECK-NEXT: Total Cycles: 320 +# CHECK-NEXT: Total uOps: 113 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.35 +# CHECK-NEXT: IPC: 0.35 +# CHECK-NEXT: Block RThroughput: 56.5 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.50 addi a0, a0, 1 +# CHECK-NEXT: 1 1 0.50 addiw a0, a0, 1 +# CHECK-NEXT: 1 1 0.50 slti a0, a0, 1 +# CHECK-NEXT: 1 1 0.50 seqz a0, a0 +# CHECK-NEXT: 1 1 0.50 andi a0, a0, 1 +# CHECK-NEXT: 1 1 0.50 ori a0, a0, 1 +# CHECK-NEXT: 1 1 0.50 xori a0, a0, 1 +# CHECK-NEXT: 1 1 0.50 slli a0, a0, 1 +# CHECK-NEXT: 1 1 0.50 srli a0, a0, 1 +# CHECK-NEXT: 1 1 0.50 srai a0, a0, 1 +# CHECK-NEXT: 1 1 0.50 slliw a0, a0, 1 +# CHECK-NEXT: 1 1 0.50 srliw a0, a0, 1 +# CHECK-NEXT: 1 1 0.50 sraiw a0, a0, 1 +# CHECK-NEXT: 1 1 0.50 lui a0, 1 +# CHECK-NEXT: 1 1 0.50 auipc a1, 1 +# CHECK-NEXT: 1 1 0.50 add a0, a0, a1 +# CHECK-NEXT: 1 1 0.50 addw a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 slt a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 sltu a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 and a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 or a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 xor a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 sll a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 srl a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 sra a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 sllw a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 srlw a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 sraw a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 sub a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 subw a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 jal a0, .Ltmp0 +# CHECK-NEXT: 1 1 0.50 jalr a0 +# CHECK-NEXT: 1 1 0.50 beq a0, a0, .Ltmp1 +# CHECK-NEXT: 1 1 0.50 bne a0, a0, .Ltmp2 +# CHECK-NEXT: 1 1 0.50 blt a0, a0, .Ltmp3 +# CHECK-NEXT: 1 1 0.50 bltu a0, a0, .Ltmp4 +# CHECK-NEXT: 1 1 0.50 bge a0, a0, .Ltmp5 +# CHECK-NEXT: 1 1 0.50 bgeu a0, a0, .Ltmp6 +# CHECK-NEXT: 1 1 0.50 add a0, a0, a0 +# CHECK-NEXT: 1 5 0.50 * lb t0, 0(a0) +# CHECK-NEXT: 1 5 0.50 * lbu t0, 0(a0) +# CHECK-NEXT: 1 5 0.50 * lh t0, 0(a0) +# CHECK-NEXT: 1 5 0.50 * lhu t0, 0(a0) +# CHECK-NEXT: 1 5 0.50 * lw t0, 0(a0) +# CHECK-NEXT: 1 5 0.50 * lwu t0, 0(a0) +# CHECK-NEXT: 1 5 0.50 * ld t0, 0(a0) +# CHECK-NEXT: 1 3 0.50 * sb t0, 0(a0) +# CHECK-NEXT: 1 3 0.50 * sh t0, 0(a0) +# CHECK-NEXT: 1 3 0.50 * sw t0, 0(a0) +# CHECK-NEXT: 1 3 0.50 * sd t0, 0(a0) +# CHECK-NEXT: 1 4 0.50 mul a0, a0, a0 +# CHECK-NEXT: 1 4 0.50 mulh a0, a0, a0 +# CHECK-NEXT: 1 4 0.50 mulhu a0, a0, a0 +# CHECK-NEXT: 1 4 0.50 mulhsu a0, a0, a0 +# CHECK-NEXT: 1 4 0.50 mulw a0, a0, a0 +# CHECK-NEXT: 1 20 0.50 div a0, a1, a2 +# CHECK-NEXT: 1 20 0.50 divu a0, a1, a2 +# CHECK-NEXT: 1 20 0.50 rem a0, a1, a2 +# CHECK-NEXT: 1 20 0.50 remu a0, a1, a2 +# CHECK-NEXT: 1 12 0.50 divw a0, a1, a2 +# CHECK-NEXT: 1 12 0.50 divuw a0, a1, a2 +# CHECK-NEXT: 1 12 0.50 remw a0, a1, a2 +# CHECK-NEXT: 1 12 0.50 remuw a0, a1, a2 +# CHECK-NEXT: 1 1 0.50 U csrrw t0, 4095, t1 +# CHECK-NEXT: 1 1 0.50 U csrrs s3, fflags, s5 +# CHECK-NEXT: 1 1 0.50 U csrrc sp, 0, ra +# CHECK-NEXT: 1 1 0.50 U csrrwi a5, 0, 0 +# CHECK-NEXT: 1 1 0.50 U csrrsi t2, 4095, 31 +# CHECK-NEXT: 1 1 0.50 U csrrci t1, sscratch, 5 +# CHECK-NEXT: 1 1 0.50 czero.eqz a0, a1, a2 +# CHECK-NEXT: 1 1 0.50 czero.nez a0, a1, a2 +# CHECK-NEXT: 1 1 0.50 czero.eqz a0, a1, a2 +# CHECK-NEXT: 1 1 0.50 czero.nez a0, a1, a2 +# CHECK-NEXT: 1 1 0.50 add.uw a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 slli.uw a0, a0, 1 +# CHECK-NEXT: 1 1 0.50 sh1add.uw a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 sh2add.uw a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 sh3add.uw a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 sh1add a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 sh2add a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 sh3add a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 andn a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 orn a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 xnor a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 clz a0, a0 +# CHECK-NEXT: 1 1 0.50 clzw a0, a0 +# CHECK-NEXT: 1 1 0.50 ctz a0, a0 +# CHECK-NEXT: 1 1 0.50 ctzw a0, a0 +# CHECK-NEXT: 1 1 0.50 cpop a0, a0 +# CHECK-NEXT: 1 1 0.50 cpopw a0, a0 +# CHECK-NEXT: 1 1 0.50 min a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 minu a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 max a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 maxu a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 sext.b a0, a0 +# CHECK-NEXT: 1 1 0.50 sext.h a0, a0 +# CHECK-NEXT: 1 1 0.50 zext.h a0, a0 +# CHECK-NEXT: 1 1 0.50 rol a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 rolw a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 ror a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 rorw a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 rori a0, a0, 1 +# CHECK-NEXT: 1 1 0.50 roriw a0, a0, 1 +# CHECK-NEXT: 1 1 0.50 orc.b a0, a0 +# CHECK-NEXT: 1 1 0.50 rev8 a0, a0 +# CHECK-NEXT: 1 1 0.50 bclr a0, a1, a2 +# CHECK-NEXT: 1 1 0.50 bclri a0, a1, 1 +# CHECK-NEXT: 1 1 0.50 bext a0, a1, a2 +# CHECK-NEXT: 1 1 0.50 bexti a0, a1, 1 +# CHECK-NEXT: 1 1 0.50 binv a0, a1, a2 +# CHECK-NEXT: 1 1 0.50 binvi a0, a1, 1 +# CHECK-NEXT: 1 1 0.50 bset a0, a1, a2 +# CHECK-NEXT: 1 1 0.50 bseti a0, a1, 1 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SMX60_FP +# CHECK-NEXT: [1.0] - SMX60_IEU +# CHECK-NEXT: [1.1] - SMX60_IEU +# CHECK-NEXT: [2.0] - SMX60_LS +# CHECK-NEXT: [2.1] - SMX60_LS + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] +# CHECK-NEXT: - 51.00 51.00 5.00 6.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] Instructions: +# CHECK-NEXT: - - 1.00 - - addi a0, a0, 1 +# CHECK-NEXT: - 1.00 - - - addiw a0, a0, 1 +# CHECK-NEXT: - - 1.00 - - slti a0, a0, 1 +# CHECK-NEXT: - 1.00 - - - seqz a0, a0 +# CHECK-NEXT: - - 1.00 - - andi a0, a0, 1 +# CHECK-NEXT: - 1.00 - - - ori a0, a0, 1 +# CHECK-NEXT: - - 1.00 - - xori a0, a0, 1 +# CHECK-NEXT: - 1.00 - - - slli a0, a0, 1 +# CHECK-NEXT: - - 1.00 - - srli a0, a0, 1 +# CHECK-NEXT: - 1.00 - - - srai a0, a0, 1 +# CHECK-NEXT: - - 1.00 - - slliw a0, a0, 1 +# CHECK-NEXT: - 1.00 - - - srliw a0, a0, 1 +# CHECK-NEXT: - - 1.00 - - sraiw a0, a0, 1 +# CHECK-NEXT: - 1.00 - - - lui a0, 1 +# CHECK-NEXT: - - 1.00 - - auipc a1, 1 +# CHECK-NEXT: - 1.00 - - - add a0, a0, a1 +# CHECK-NEXT: - - 1.00 - - addw a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - slt a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - sltu a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - and a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - or a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - xor a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - sll a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - srl a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - sra a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - sllw a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - srlw a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - sraw a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - sub a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - subw a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - jal a0, .Ltmp0 +# CHECK-NEXT: - 1.00 - - - jalr a0 +# CHECK-NEXT: - - 1.00 - - beq a0, a0, .Ltmp1 +# CHECK-NEXT: - 1.00 - - - bne a0, a0, .Ltmp2 +# CHECK-NEXT: - - 1.00 - - blt a0, a0, .Ltmp3 +# CHECK-NEXT: - 1.00 - - - bltu a0, a0, .Ltmp4 +# CHECK-NEXT: - - 1.00 - - bge a0, a0, .Ltmp5 +# CHECK-NEXT: - 1.00 - - - bgeu a0, a0, .Ltmp6 +# CHECK-NEXT: - - 1.00 - - add a0, a0, a0 +# CHECK-NEXT: - - - - 1.00 lb t0, 0(a0) +# CHECK-NEXT: - - - 1.00 - lbu t0, 0(a0) +# CHECK-NEXT: - - - - 1.00 lh t0, 0(a0) +# CHECK-NEXT: - - - 1.00 - lhu t0, 0(a0) +# CHECK-NEXT: - - - - 1.00 lw t0, 0(a0) +# CHECK-NEXT: - - - 1.00 - lwu t0, 0(a0) +# CHECK-NEXT: - - - - 1.00 ld t0, 0(a0) +# CHECK-NEXT: - - - 1.00 - sb t0, 0(a0) +# CHECK-NEXT: - - - - 1.00 sh t0, 0(a0) +# CHECK-NEXT: - - - 1.00 - sw t0, 0(a0) +# CHECK-NEXT: - - - - 1.00 sd t0, 0(a0) +# CHECK-NEXT: - 1.00 - - - mul a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - mulh a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - mulhu a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - mulhsu a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - mulw a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - div a0, a1, a2 +# CHECK-NEXT: - 1.00 - - - divu a0, a1, a2 +# CHECK-NEXT: - - 1.00 - - rem a0, a1, a2 +# CHECK-NEXT: - 1.00 - - - remu a0, a1, a2 +# CHECK-NEXT: - - 1.00 - - divw a0, a1, a2 +# CHECK-NEXT: - 1.00 - - - divuw a0, a1, a2 +# CHECK-NEXT: - - 1.00 - - remw a0, a1, a2 +# CHECK-NEXT: - 1.00 - - - remuw a0, a1, a2 +# CHECK-NEXT: - - 1.00 - - csrrw t0, 4095, t1 +# CHECK-NEXT: - 1.00 - - - csrrs s3, fflags, s5 +# CHECK-NEXT: - - 1.00 - - csrrc sp, 0, ra +# CHECK-NEXT: - 1.00 - - - csrrwi a5, 0, 0 +# CHECK-NEXT: - - 1.00 - - csrrsi t2, 4095, 31 +# CHECK-NEXT: - 1.00 - - - csrrci t1, sscratch, 5 +# CHECK-NEXT: - - 1.00 - - czero.eqz a0, a1, a2 +# CHECK-NEXT: - 1.00 - - - czero.nez a0, a1, a2 +# CHECK-NEXT: - - 1.00 - - czero.eqz a0, a1, a2 +# CHECK-NEXT: - 1.00 - - - czero.nez a0, a1, a2 +# CHECK-NEXT: - - 1.00 - - add.uw a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - slli.uw a0, a0, 1 +# CHECK-NEXT: - - 1.00 - - sh1add.uw a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - sh2add.uw a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - sh3add.uw a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - sh1add a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - sh2add a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - sh3add a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - andn a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - orn a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - xnor a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - clz a0, a0 +# CHECK-NEXT: - - 1.00 - - clzw a0, a0 +# CHECK-NEXT: - 1.00 - - - ctz a0, a0 +# CHECK-NEXT: - - 1.00 - - ctzw a0, a0 +# CHECK-NEXT: - 1.00 - - - cpop a0, a0 +# CHECK-NEXT: - - 1.00 - - cpopw a0, a0 +# CHECK-NEXT: - 1.00 - - - min a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - minu a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - max a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - maxu a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - sext.b a0, a0 +# CHECK-NEXT: - - 1.00 - - sext.h a0, a0 +# CHECK-NEXT: - 1.00 - - - zext.h a0, a0 +# CHECK-NEXT: - - 1.00 - - rol a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - rolw a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - ror a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - rorw a0, a0, a0 +# CHECK-NEXT: - - 1.00 - - rori a0, a0, 1 +# CHECK-NEXT: - 1.00 - - - roriw a0, a0, 1 +# CHECK-NEXT: - - 1.00 - - orc.b a0, a0 +# CHECK-NEXT: - 1.00 - - - rev8 a0, a0 +# CHECK-NEXT: - - 1.00 - - bclr a0, a1, a2 +# CHECK-NEXT: - 1.00 - - - bclri a0, a1, 1 +# CHECK-NEXT: - - 1.00 - - bext a0, a1, a2 +# CHECK-NEXT: - 1.00 - - - bexti a0, a1, 1 +# CHECK-NEXT: - - 1.00 - - binv a0, a1, a2 +# CHECK-NEXT: - 1.00 - - - binvi a0, a1, 1 +# CHECK-NEXT: - - 1.00 - - bset a0, a1, a2 +# CHECK-NEXT: - 1.00 - - - bseti a0, a1, 1 From b48c476f6b908aa9f47597a31e662036d01c6e65 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Fri, 25 Apr 2025 18:24:35 -0300 Subject: [PATCH 02/20] Add comment about fp proc Signed-off-by: Mikhail R. Gadelha --- llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index d1148cc2f69dc..59f812d98a358 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -38,6 +38,9 @@ let BufferSize = 0 in { // An IEU can decode and issue two instructions at the same time def SMX60_IEU : ProcResource<2>; + // Although the X60 does appear to support multiple issue for at least some + // floating point instructions, this model assumes single issue as + // increasing it reduces the gains we saw in performance def SMX60_FP : ProcResource<1>; } From cb4bb7607ded529c44a5ca720b5350545de0e800 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Fri, 25 Apr 2025 18:28:56 -0300 Subject: [PATCH 03/20] Split IEU procResource and set jump to only use one Signed-off-by: Mikhail R. Gadelha --- llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index 59f812d98a358..f1c429c8b1e06 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -36,7 +36,9 @@ let BufferSize = 0 in { def SMX60_LS : ProcResource<2>; // An IEU can decode and issue two instructions at the same time - def SMX60_IEU : ProcResource<2>; + def SMX60_IEUA : ProcResource<1>; + def SMX60_IEUB : ProcResource<1>; + def SMX60_IEU : ProcResGroup<[SMX60_IEUA, SMX60_IEUB]>; // Although the X60 does appear to support multiple issue for at least some // floating point instructions, this model assumes single issue as @@ -47,9 +49,9 @@ let BufferSize = 0 in { //===----------------------------------------------------------------------===// // Branching -def : WriteRes; -def : WriteRes; -def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; // Integer arithmetic and logic def : WriteRes; From fdfb1c0e4e8bf2408aee14740c1adc966ddd597e Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Fri, 25 Apr 2025 18:31:55 -0300 Subject: [PATCH 04/20] single-issue for div and add ReleaseAtCycles Signed-off-by: Mikhail R. Gadelha --- llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index f1c429c8b1e06..2268791f560e9 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -69,10 +69,15 @@ let Latency = 4 in { // Integer division/remainder // Worst case latency is used. -def : WriteRes { let Latency = 12; } -def : WriteRes { let Latency = 20; } -def : WriteRes { let Latency = 12; } -def : WriteRes { let Latency = 20; } +let Latency = 12, ReleaseAtCycles = [12] in { + def : WriteRes; + def : WriteRes; +} + +let Latency = 20, ReleaseAtCycles = [20] in { + def : WriteRes; + def : WriteRes; +} // Bitmanip def : WriteRes; From 05b146826db704bb2b957500e62a4b9dfa93cf4c Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Fri, 25 Apr 2025 18:34:48 -0300 Subject: [PATCH 05/20] Grouped together latencies with the same value Signed-off-by: Mikhail R. Gadelha --- .../lib/Target/RISCV/RISCVSchedSpacemitX60.td | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index 2268791f560e9..31d063f878c02 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -140,11 +140,13 @@ let Latency = 5 in { } // Floating point units Half precision -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 3; } +let Latency = 3 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 3; } // Worst case latency is used let Latency = 7, ReleaseAtCycles = [7] in { @@ -153,11 +155,13 @@ let Latency = 7, ReleaseAtCycles = [7] in { } // Single precision -def : WriteRes { let Latency = 3; } +let Latency = 3 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; +} def : WriteRes { let Latency = 4; } def : WriteRes { let Latency = 5; } -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 3; } // Worst case latency is used let Latency = 10, ReleaseAtCycles = [10] in { @@ -166,11 +170,15 @@ let Latency = 10, ReleaseAtCycles = [10] in { } // Double precision -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } +let Latency = 4 in { + def : WriteRes; + def : WriteRes; +} +let Latency = 3 in { + def : WriteRes; + def : WriteRes; +} def : WriteRes { let Latency = 5; } -def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 3; } let Latency = 10, ReleaseAtCycles = [10] in { def : WriteRes; From 0ffd576a5b97484231d8352e85bdb9a6138e8c48 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Fri, 25 Apr 2025 18:36:38 -0300 Subject: [PATCH 06/20] Removed -mattr Signed-off-by: Mikhail R. Gadelha --- llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s | 2 +- llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s | 2 +- llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s index 73109a78cd4b9..2d8cc07d2f374 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva22u64 -mcpu=spacemit-x60 -iterations=1 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=riscv64 -mcpu=spacemit-x60 -iterations=1 < %s | FileCheck %s # Zalrsc lr.w t0, (t1) diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s index 1cd6f2a91f2b7..bf95e64056008 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva22u64 -mcpu=spacemit-x60 -iterations=1 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=riscv64 -mcpu=spacemit-x60 -iterations=1 < %s | FileCheck %s # Floating-Point Load and Store Instructions ## Half-Precision diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s index 1c9d57a5b553f..575dd5497ca4b 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva22u64 -mcpu=spacemit-x60 -iterations=1 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=riscv64 -mcpu=spacemit-x60 -iterations=1 < %s | FileCheck %s # Integer Register-Immediate Instructions addi a0, a0, 1 From 016e97484219b8120726b40221d12ebcd7f89c97 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Fri, 25 Apr 2025 18:45:17 -0300 Subject: [PATCH 07/20] Added -instruction-tables=full to the tests Signed-off-by: Mikhail R. Gadelha --- .../tools/llvm-mca/RISCV/SpacemitX60/atomic.s | 386 +++++++------- .../RISCV/SpacemitX60/floating-point.s | 260 +++++----- .../llvm-mca/RISCV/SpacemitX60/integer.s | 482 +++++++++--------- 3 files changed, 564 insertions(+), 564 deletions(-) diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s index 2d8cc07d2f374..ceab015e27203 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=riscv64 -mcpu=spacemit-x60 -iterations=1 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=riscv64 -mcpu=spacemit-x60 -iterations=1 -instruction-tables=full < %s | FileCheck %s # Zalrsc lr.w t0, (t1) @@ -101,15 +101,12 @@ amomax.d.aqrl s7, s6, (s5) amominu.d.aqrl s6, s5, (s4) amomaxu.d.aqrl s5, s4, (s3) -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 88 -# CHECK-NEXT: Total Cycles: 86 -# CHECK-NEXT: Total uOps: 88 - -# CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 1.02 -# CHECK-NEXT: IPC: 1.02 -# CHECK-NEXT: Block RThroughput: 44.0 +# CHECK: Resources: +# CHECK-NEXT: [0] - SMX60_FP:1 +# CHECK-NEXT: [1] - SMX60_IEU:2 SMX60_IEUA, SMX60_IEUB +# CHECK-NEXT: [2] - SMX60_IEUA:1 +# CHECK-NEXT: [3] - SMX60_IEUB:1 +# CHECK-NEXT: [4] - SMX60_LS:2 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -118,195 +115,198 @@ amomaxu.d.aqrl s5, s4, (s3) # CHECK-NEXT: [4]: MayLoad # CHECK-NEXT: [5]: MayStore # CHECK-NEXT: [6]: HasSideEffects (U) +# CHECK-NEXT: [7]: Bypass Latency +# CHECK-NEXT: [8]: Resources ( | [] | [, | [] | [, | [] | [, Date: Sat, 26 Apr 2025 10:44:15 -0300 Subject: [PATCH 08/20] Update latencies based on experiments Signed-off-by: Mikhail R. Gadelha --- llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index 31d063f878c02..324a9ae912ec2 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -69,12 +69,12 @@ let Latency = 4 in { // Integer division/remainder // Worst case latency is used. -let Latency = 12, ReleaseAtCycles = [12] in { +let Latency = 15, ReleaseAtCycles = [15] in { def : WriteRes; def : WriteRes; } -let Latency = 20, ReleaseAtCycles = [20] in { +let Latency = 23, ReleaseAtCycles = [23] in { def : WriteRes; def : WriteRes; } From f0c18306601f4760f3b4e9e33ec3173f699084d1 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Sat, 26 Apr 2025 12:20:48 -0300 Subject: [PATCH 09/20] Add latency for cpop Signed-off-by: Mikhail R. Gadelha --- llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index 324a9ae912ec2..107bdf8cc609d 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -90,8 +90,10 @@ def : WriteRes; def : WriteRes; def : WriteRes; -def : WriteRes; -def : WriteRes; +let Latency = 2 in { + def : WriteRes; + def : WriteRes; +} def : WriteRes; From 31ef91bcac05ffb55e0dc93772c630f69859f4f9 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Sun, 27 Apr 2025 15:45:41 -0300 Subject: [PATCH 10/20] Updated latency based on experiments Signed-off-by: Mikhail R. Gadelha --- .../lib/Target/RISCV/RISCVSchedSpacemitX60.td | 79 +++++++++---------- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index 107bdf8cc609d..5378a098cff3c 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -54,6 +54,7 @@ def : WriteRes; def : WriteRes; // Integer arithmetic and logic +// Latency of ALU instructions is 1, but add.uw is 2 def : WriteRes; def : WriteRes; def : WriteRes; @@ -62,14 +63,13 @@ def : WriteRes; def : WriteRes; // Integer multiplication -let Latency = 4 in { - def : WriteRes; - def : WriteRes; -} +// The latency of mul is 5, while mulh, mulhsu, mulhu is 6. +// Worst case latency is used +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 3; } // Integer division/remainder -// Worst case latency is used. -let Latency = 15, ReleaseAtCycles = [15] in { +let Latency = 3, ReleaseAtCycles = [3] in { def : WriteRes; def : WriteRes; } @@ -96,13 +96,14 @@ let Latency = 2 in { } def : WriteRes; - def : WriteRes; - def : WriteRes; -def : WriteRes; -def : WriteRes; +let Latency = 2 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; +} // Single-bit instructions def : WriteRes; @@ -142,92 +143,90 @@ let Latency = 5 in { } // Floating point units Half precision -let Latency = 3 in { +let Latency = 4 in { def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; } -def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 5; } -// Worst case latency is used -let Latency = 7, ReleaseAtCycles = [7] in { +let Latency = 12, ReleaseAtCycles = [12] in { def : WriteRes; def : WriteRes; } // Single precision -let Latency = 3 in { +let Latency = 4 in { def : WriteRes; + def : WriteRes; def : WriteRes; def : WriteRes; } -def : WriteRes { let Latency = 4; } def : WriteRes { let Latency = 5; } -// Worst case latency is used -let Latency = 10, ReleaseAtCycles = [10] in { +let Latency = 15, ReleaseAtCycles = [15] in { def : WriteRes; def : WriteRes; } // Double precision -let Latency = 4 in { +let Latency = 5 in { def : WriteRes; def : WriteRes; -} -let Latency = 3 in { def : WriteRes; - def : WriteRes; } -def : WriteRes { let Latency = 5; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 6; } -let Latency = 10, ReleaseAtCycles = [10] in { +let Latency = 22, ReleaseAtCycles = [22] in { def : WriteRes; def : WriteRes; } // Conversions -let Latency = 3 in { +let Latency = 6 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +let Latency = 4 in { def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; - def : WriteRes; - def : WriteRes; def : WriteRes; def : WriteRes; - def : WriteRes; - def : WriteRes; def : WriteRes; def : WriteRes; - def : WriteRes; - def : WriteRes; def : WriteRes; def : WriteRes; } -let Latency = 2 in { +let Latency = 6 in { def : WriteRes; def : WriteRes; def : WriteRes; -} -let Latency = 4 in { def : WriteRes; def : WriteRes; def : WriteRes; + + def : WriteRes; + def : WriteRes; } -let Latency = 2 in { +let Latency = 4 in { def : WriteRes; - def : WriteRes; - def : WriteRes; - def : WriteRes; - def : WriteRes; def : WriteRes; + def : WriteRes; + def : WriteRes; } // Others @@ -334,6 +333,7 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; // Single-bit instructions def : ReadAdvance; def : ReadAdvance; @@ -343,7 +343,6 @@ def : ReadAdvance; defm : UnsupportedSchedV; defm : UnsupportedSchedXsfvcp; defm : UnsupportedSchedZabha; -defm : UnsupportedSchedZbc; defm : UnsupportedSchedZbkb; defm : UnsupportedSchedZbkx; defm : UnsupportedSchedZfa; From 753bce9ddb7fd19dbcc1f5ba0ab7402bab35b82e Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Sun, 27 Apr 2025 15:45:51 -0300 Subject: [PATCH 11/20] Updated tests Signed-off-by: Mikhail R. Gadelha --- .../RISCV/SpacemitX60/floating-point.s | 162 +++++++++--------- .../llvm-mca/RISCV/SpacemitX60/integer.s | 60 +++---- 2 files changed, 111 insertions(+), 111 deletions(-) diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s index d298774625bc1..bd3666ef7bb9f 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s @@ -154,85 +154,85 @@ fclass.d a3, ft10 # CHECK-NEXT: 1 3 0.50 * 3 SMX60_LS FSW fsw ft0, 0(a0) # CHECK-NEXT: 1 5 0.50 * 5 SMX60_LS FLD fld ft0, 0(a0) # CHECK-NEXT: 1 3 0.50 * 3 SMX60_LS FSD fsd ft0, 0(a0) -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FADD_H fadd.h fs10, fs11, ft8 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FSUB_H fsub.h ft9, ft10, ft11 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FMUL_H fmul.h ft0, ft1, ft2 -# CHECK-NEXT: 1 7 7.00 7 SMX60_FP[7] FDIV_H fdiv.h ft3, ft4, ft5 -# CHECK-NEXT: 1 7 7.00 7 SMX60_FP[7] FSQRT_H fsqrt.h ft6, ft7 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FMIN_H fmin.h fa5, fa6, fa7 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FMAX_H fmax.h fs2, fs3, fs4 -# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FMADD_H fmadd.h fa0, fa1, fa2, ft11 -# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FMSUB_H fmsub.h fa4, fa5, fa6, fa7 -# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FNMSUB_H fnmsub.h fs2, fs3, fs4, fs5 -# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FNMADD_H fnmadd.h fs6, fs7, fs8, fs9 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FADD_S fadd.s fs10, fs11, ft8 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FSUB_S fsub.s ft9, ft10, ft11 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FADD_H fadd.h fs10, fs11, ft8 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FSUB_H fsub.h ft9, ft10, ft11 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FMUL_H fmul.h ft0, ft1, ft2 +# CHECK-NEXT: 1 12 12.00 12 SMX60_FP[12] FDIV_H fdiv.h ft3, ft4, ft5 +# CHECK-NEXT: 1 12 12.00 12 SMX60_FP[12] FSQRT_H fsqrt.h ft6, ft7 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FMIN_H fmin.h fa5, fa6, fa7 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FMAX_H fmax.h fs2, fs3, fs4 +# CHECK-NEXT: 1 5 1.00 5 SMX60_FP FMADD_H fmadd.h fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1 5 1.00 5 SMX60_FP FMSUB_H fmsub.h fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1 5 1.00 5 SMX60_FP FNMSUB_H fnmsub.h fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1 5 1.00 5 SMX60_FP FNMADD_H fnmadd.h fs6, fs7, fs8, fs9 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FADD_S fadd.s fs10, fs11, ft8 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FSUB_S fsub.s ft9, ft10, ft11 # CHECK-NEXT: 1 4 1.00 4 SMX60_FP FMUL_S fmul.s ft0, ft1, ft2 -# CHECK-NEXT: 1 10 10.00 10 SMX60_FP[10] FDIV_S fdiv.s ft3, ft4, ft5 -# CHECK-NEXT: 1 10 10.00 10 SMX60_FP[10] FSQRT_S fsqrt.s ft6, ft7 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FMIN_S fmin.s fa5, fa6, fa7 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FMAX_S fmax.s fs2, fs3, fs4 +# CHECK-NEXT: 1 15 15.00 15 SMX60_FP[15] FDIV_S fdiv.s ft3, ft4, ft5 +# CHECK-NEXT: 1 15 15.00 15 SMX60_FP[15] FSQRT_S fsqrt.s ft6, ft7 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FMIN_S fmin.s fa5, fa6, fa7 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FMAX_S fmax.s fs2, fs3, fs4 # CHECK-NEXT: 1 5 1.00 5 SMX60_FP FMADD_S fmadd.s fa0, fa1, fa2, ft11 # CHECK-NEXT: 1 5 1.00 5 SMX60_FP FMSUB_S fmsub.s fa4, fa5, fa6, fa7 # CHECK-NEXT: 1 5 1.00 5 SMX60_FP FNMSUB_S fnmsub.s fs2, fs3, fs4, fs5 # CHECK-NEXT: 1 5 1.00 5 SMX60_FP FNMADD_S fnmadd.s fs6, fs7, fs8, fs9 -# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FADD_D fadd.d fs10, fs11, ft8 -# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FSUB_D fsub.d ft9, ft10, ft11 -# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FMUL_D fmul.d ft0, ft1, ft2 -# CHECK-NEXT: 1 10 10.00 10 SMX60_FP[10] FDIV_D fdiv.d ft3, ft4, ft5 -# CHECK-NEXT: 1 10 10.00 10 SMX60_FP[10] FSQRT_D fsqrt.d ft6, ft7 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FMIN_D fmin.d fa5, fa6, fa7 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FMAX_D fmax.d fs2, fs3, fs4 -# CHECK-NEXT: 1 5 1.00 5 SMX60_FP FMADD_D fmadd.d fa0, fa1, fa2, ft11 -# CHECK-NEXT: 1 5 1.00 5 SMX60_FP FMSUB_D fmsub.d fa4, fa5, fa6, fa7 -# CHECK-NEXT: 1 5 1.00 5 SMX60_FP FNMSUB_D fnmsub.d fs2, fs3, fs4, fs5 -# CHECK-NEXT: 1 5 1.00 5 SMX60_FP FNMADD_D fnmadd.d fs6, fs7, fs8, fs9 -# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU FMV_X_H fmv.x.h a2, fs7 -# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU FMV_H_X fmv.h.x ft1, a6 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FCVT_S_H fcvt.s.h fa0, ft0 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FCVT_S_H fcvt.s.h fa0, ft0, rup -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FCVT_H_S fcvt.h.s ft2, fa2 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FCVT_D_H fcvt.d.h fa0, ft0 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FCVT_D_H fcvt.d.h fa0, ft0, rup -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FCVT_H_D fcvt.h.d ft2, fa2 -# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU FCVT_W_S fcvt.w.s a0, fs5 -# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU FCVT_WU_S fcvt.wu.s a1, fs6 -# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU FCVT_S_W fcvt.s.w ft11, a4 -# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU FCVT_S_WU fcvt.s.wu ft0, a5 -# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU FCVT_L_S fcvt.l.s a0, ft0 -# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU FCVT_LU_S fcvt.lu.s a1, ft1 -# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU FCVT_S_L fcvt.s.l ft2, a2 -# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU FCVT_S_LU fcvt.s.lu ft3, a3 -# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU FMV_X_W fmv.x.w a2, fs7 -# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU FMV_W_X fmv.w.x ft1, a6 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FSGNJ_S fsgnj.s fs1, fa0, fa1 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FSGNJN_S fsgnjn.s fa1, fa3, fa4 -# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU FCVT_WU_D fcvt.wu.d a4, ft11 -# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU FCVT_W_D fcvt.w.d a4, ft11 -# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU FCVT_D_W fcvt.d.w ft0, a5 -# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU FCVT_D_WU fcvt.d.wu ft1, a6 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FCVT_S_D fcvt.s.d fs5, fs6 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FCVT_D_S fcvt.d.s fs7, fs8 -# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU FCVT_L_D fcvt.l.d a0, ft0 -# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU FCVT_LU_D fcvt.lu.d a1, ft1 -# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU FCVT_D_L fcvt.d.l ft3, a3 -# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU FCVT_D_LU fcvt.d.lu ft4, a4 -# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU FMV_X_D fmv.x.d a2, ft2 -# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU FMV_D_X fmv.d.x ft5, a5 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FSGNJ_D fsgnj.d fs1, fa0, fa1 -# CHECK-NEXT: 1 3 1.00 3 SMX60_FP FSGNJN_D fsgnjn.d fa1, fa3, fa4 -# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FEQ_H feq.h a1, fs8, fs9 -# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FLT_H flt.h a2, fs10, fs11 -# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FLE_H fle.h a3, ft8, ft9 -# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FEQ_S feq.s a1, fs8, fs9 -# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FLT_S flt.s a2, fs10, fs11 -# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FLE_S fle.s a3, ft8, ft9 -# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FEQ_D feq.d a1, fs8, fs9 -# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FLT_D flt.d a2, fs10, fs11 -# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FLE_D fle.d a3, ft8, ft9 -# CHECK-NEXT: 1 2 1.00 2 SMX60_FP FCLASS_S fclass.s a3, ft10 -# CHECK-NEXT: 1 2 1.00 2 SMX60_FP FCLASS_S fclass.s a3, ft10 -# CHECK-NEXT: 1 2 1.00 2 SMX60_FP FCLASS_D fclass.d a3, ft10 +# CHECK-NEXT: 1 5 1.00 5 SMX60_FP FADD_D fadd.d fs10, fs11, ft8 +# CHECK-NEXT: 1 5 1.00 5 SMX60_FP FSUB_D fsub.d ft9, ft10, ft11 +# CHECK-NEXT: 1 5 1.00 5 SMX60_FP FMUL_D fmul.d ft0, ft1, ft2 +# CHECK-NEXT: 1 22 22.00 22 SMX60_FP[22] FDIV_D fdiv.d ft3, ft4, ft5 +# CHECK-NEXT: 1 22 22.00 22 SMX60_FP[22] FSQRT_D fsqrt.d ft6, ft7 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FMIN_D fmin.d fa5, fa6, fa7 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FMAX_D fmax.d fs2, fs3, fs4 +# CHECK-NEXT: 1 6 1.00 6 SMX60_FP FMADD_D fmadd.d fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1 6 1.00 6 SMX60_FP FMSUB_D fmsub.d fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1 6 1.00 6 SMX60_FP FNMSUB_D fnmsub.d fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1 6 1.00 6 SMX60_FP FNMADD_D fnmadd.d fs6, fs7, fs8, fs9 +# CHECK-NEXT: 1 6 0.50 6 SMX60_IEU FMV_X_H fmv.x.h a2, fs7 +# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU FMV_H_X fmv.h.x ft1, a6 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FCVT_S_H fcvt.s.h fa0, ft0 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FCVT_S_H fcvt.s.h fa0, ft0, rup +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FCVT_H_S fcvt.h.s ft2, fa2 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FCVT_D_H fcvt.d.h fa0, ft0 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FCVT_D_H fcvt.d.h fa0, ft0, rup +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FCVT_H_D fcvt.h.d ft2, fa2 +# CHECK-NEXT: 1 6 0.50 6 SMX60_IEU FCVT_W_S fcvt.w.s a0, fs5 +# CHECK-NEXT: 1 6 0.50 6 SMX60_IEU FCVT_WU_S fcvt.wu.s a1, fs6 +# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU FCVT_S_W fcvt.s.w ft11, a4 +# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU FCVT_S_WU fcvt.s.wu ft0, a5 +# CHECK-NEXT: 1 6 0.50 6 SMX60_IEU FCVT_L_S fcvt.l.s a0, ft0 +# CHECK-NEXT: 1 6 0.50 6 SMX60_IEU FCVT_LU_S fcvt.lu.s a1, ft1 +# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU FCVT_S_L fcvt.s.l ft2, a2 +# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU FCVT_S_LU fcvt.s.lu ft3, a3 +# CHECK-NEXT: 1 6 0.50 6 SMX60_IEU FMV_X_W fmv.x.w a2, fs7 +# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU FMV_W_X fmv.w.x ft1, a6 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FSGNJ_S fsgnj.s fs1, fa0, fa1 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FSGNJN_S fsgnjn.s fa1, fa3, fa4 +# CHECK-NEXT: 1 6 0.50 6 SMX60_IEU FCVT_WU_D fcvt.wu.d a4, ft11 +# CHECK-NEXT: 1 6 0.50 6 SMX60_IEU FCVT_W_D fcvt.w.d a4, ft11 +# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU FCVT_D_W fcvt.d.w ft0, a5 +# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU FCVT_D_WU fcvt.d.wu ft1, a6 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FCVT_S_D fcvt.s.d fs5, fs6 +# CHECK-NEXT: 1 4 1.00 4 SMX60_FP FCVT_D_S fcvt.d.s fs7, fs8 +# CHECK-NEXT: 1 6 0.50 6 SMX60_IEU FCVT_L_D fcvt.l.d a0, ft0 +# CHECK-NEXT: 1 6 0.50 6 SMX60_IEU FCVT_LU_D fcvt.lu.d a1, ft1 +# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU FCVT_D_L fcvt.d.l ft3, a3 +# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU FCVT_D_LU fcvt.d.lu ft4, a4 +# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU FMV_X_D fmv.x.d a2, ft2 +# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU FMV_D_X fmv.d.x ft5, a5 +# CHECK-NEXT: 1 5 1.00 5 SMX60_FP FSGNJ_D fsgnj.d fs1, fa0, fa1 +# CHECK-NEXT: 1 5 1.00 5 SMX60_FP FSGNJN_D fsgnjn.d fa1, fa3, fa4 +# CHECK-NEXT: 1 6 1.00 6 SMX60_FP FEQ_H feq.h a1, fs8, fs9 +# CHECK-NEXT: 1 6 1.00 6 SMX60_FP FLT_H flt.h a2, fs10, fs11 +# CHECK-NEXT: 1 6 1.00 6 SMX60_FP FLE_H fle.h a3, ft8, ft9 +# CHECK-NEXT: 1 6 1.00 6 SMX60_FP FEQ_S feq.s a1, fs8, fs9 +# CHECK-NEXT: 1 6 1.00 6 SMX60_FP FLT_S flt.s a2, fs10, fs11 +# CHECK-NEXT: 1 6 1.00 6 SMX60_FP FLE_S fle.s a3, ft8, ft9 +# CHECK-NEXT: 1 6 1.00 6 SMX60_FP FEQ_D feq.d a1, fs8, fs9 +# CHECK-NEXT: 1 6 1.00 6 SMX60_FP FLT_D flt.d a2, fs10, fs11 +# CHECK-NEXT: 1 6 1.00 6 SMX60_FP FLE_D fle.d a3, ft8, ft9 +# CHECK-NEXT: 1 6 1.00 6 SMX60_FP FCLASS_S fclass.s a3, ft10 +# CHECK-NEXT: 1 6 1.00 6 SMX60_FP FCLASS_S fclass.s a3, ft10 +# CHECK-NEXT: 1 6 1.00 6 SMX60_FP FCLASS_D fclass.d a3, ft10 # CHECK: Resources: # CHECK-NEXT: [0] - SMX60_FP @@ -243,7 +243,7 @@ fclass.d a3, ft10 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] -# CHECK-NEXT: 105.00 11.00 11.00 3.00 3.00 +# CHECK-NEXT: 149.00 11.00 11.00 3.00 3.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] Instructions: @@ -256,8 +256,8 @@ fclass.d a3, ft10 # CHECK-NEXT: 1.00 - - - - fadd.h fs10, fs11, ft8 # CHECK-NEXT: 1.00 - - - - fsub.h ft9, ft10, ft11 # CHECK-NEXT: 1.00 - - - - fmul.h ft0, ft1, ft2 -# CHECK-NEXT: 7.00 - - - - fdiv.h ft3, ft4, ft5 -# CHECK-NEXT: 7.00 - - - - fsqrt.h ft6, ft7 +# CHECK-NEXT: 12.00 - - - - fdiv.h ft3, ft4, ft5 +# CHECK-NEXT: 12.00 - - - - fsqrt.h ft6, ft7 # CHECK-NEXT: 1.00 - - - - fmin.h fa5, fa6, fa7 # CHECK-NEXT: 1.00 - - - - fmax.h fs2, fs3, fs4 # CHECK-NEXT: 1.00 - - - - fmadd.h fa0, fa1, fa2, ft11 @@ -267,8 +267,8 @@ fclass.d a3, ft10 # CHECK-NEXT: 1.00 - - - - fadd.s fs10, fs11, ft8 # CHECK-NEXT: 1.00 - - - - fsub.s ft9, ft10, ft11 # CHECK-NEXT: 1.00 - - - - fmul.s ft0, ft1, ft2 -# CHECK-NEXT: 10.00 - - - - fdiv.s ft3, ft4, ft5 -# CHECK-NEXT: 10.00 - - - - fsqrt.s ft6, ft7 +# CHECK-NEXT: 15.00 - - - - fdiv.s ft3, ft4, ft5 +# CHECK-NEXT: 15.00 - - - - fsqrt.s ft6, ft7 # CHECK-NEXT: 1.00 - - - - fmin.s fa5, fa6, fa7 # CHECK-NEXT: 1.00 - - - - fmax.s fs2, fs3, fs4 # CHECK-NEXT: 1.00 - - - - fmadd.s fa0, fa1, fa2, ft11 @@ -278,8 +278,8 @@ fclass.d a3, ft10 # CHECK-NEXT: 1.00 - - - - fadd.d fs10, fs11, ft8 # CHECK-NEXT: 1.00 - - - - fsub.d ft9, ft10, ft11 # CHECK-NEXT: 1.00 - - - - fmul.d ft0, ft1, ft2 -# CHECK-NEXT: 10.00 - - - - fdiv.d ft3, ft4, ft5 -# CHECK-NEXT: 10.00 - - - - fsqrt.d ft6, ft7 +# CHECK-NEXT: 22.00 - - - - fdiv.d ft3, ft4, ft5 +# CHECK-NEXT: 22.00 - - - - fsqrt.d ft6, ft7 # CHECK-NEXT: 1.00 - - - - fmin.d fa5, fa6, fa7 # CHECK-NEXT: 1.00 - - - - fmax.d fs2, fs3, fs4 # CHECK-NEXT: 1.00 - - - - fmadd.d fa0, fa1, fa2, ft11 diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s index 15d16d8bd8b4a..5b1f64bc8320f 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s @@ -228,19 +228,19 @@ bseti a0, a1, 1 # CHECK-NEXT: 1 3 0.50 * 3 SMX60_LS SH sh t0, 0(a0) # CHECK-NEXT: 1 3 0.50 * 3 SMX60_LS SW sw t0, 0(a0) # CHECK-NEXT: 1 3 0.50 * 3 SMX60_LS SD sd t0, 0(a0) -# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU MUL mul a0, a0, a0 -# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU MULH mulh a0, a0, a0 -# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU MULHU mulhu a0, a0, a0 -# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU MULHSU mulhsu a0, a0, a0 -# CHECK-NEXT: 1 4 0.50 4 SMX60_IEU MULW mulw a0, a0, a0 -# CHECK-NEXT: 1 20 20.00 20 SMX60_IEU[20],SMX60_IEUA[20] DIV div a0, a1, a2 -# CHECK-NEXT: 1 20 20.00 20 SMX60_IEU[20],SMX60_IEUA[20] DIVU divu a0, a1, a2 -# CHECK-NEXT: 1 20 20.00 20 SMX60_IEU[20],SMX60_IEUA[20] REM rem a0, a1, a2 -# CHECK-NEXT: 1 20 20.00 20 SMX60_IEU[20],SMX60_IEUA[20] REMU remu a0, a1, a2 -# CHECK-NEXT: 1 12 12.00 12 SMX60_IEU[12],SMX60_IEUA[12] DIVW divw a0, a1, a2 -# CHECK-NEXT: 1 12 12.00 12 SMX60_IEU[12],SMX60_IEUA[12] DIVUW divuw a0, a1, a2 -# CHECK-NEXT: 1 12 12.00 12 SMX60_IEU[12],SMX60_IEUA[12] REMW remw a0, a1, a2 -# CHECK-NEXT: 1 12 12.00 12 SMX60_IEU[12],SMX60_IEUA[12] REMUW remuw a0, a1, a2 +# CHECK-NEXT: 1 6 0.50 6 SMX60_IEU MUL mul a0, a0, a0 +# CHECK-NEXT: 1 6 0.50 6 SMX60_IEU MULH mulh a0, a0, a0 +# CHECK-NEXT: 1 6 0.50 6 SMX60_IEU MULHU mulhu a0, a0, a0 +# CHECK-NEXT: 1 6 0.50 6 SMX60_IEU MULHSU mulhsu a0, a0, a0 +# CHECK-NEXT: 1 3 0.50 3 SMX60_IEU MULW mulw a0, a0, a0 +# CHECK-NEXT: 1 23 23.00 23 SMX60_IEU[23],SMX60_IEUA[23] DIV div a0, a1, a2 +# CHECK-NEXT: 1 23 23.00 23 SMX60_IEU[23],SMX60_IEUA[23] DIVU divu a0, a1, a2 +# CHECK-NEXT: 1 23 23.00 23 SMX60_IEU[23],SMX60_IEUA[23] REM rem a0, a1, a2 +# CHECK-NEXT: 1 23 23.00 23 SMX60_IEU[23],SMX60_IEUA[23] REMU remu a0, a1, a2 +# CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] DIVW divw a0, a1, a2 +# CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] DIVUW divuw a0, a1, a2 +# CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] REMW remw a0, a1, a2 +# CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] REMUW remuw a0, a1, a2 # CHECK-NEXT: 1 1 0.50 U 1 SMX60_IEU CSRRW csrrw t0, 4095, t1 # CHECK-NEXT: 1 1 0.50 U 1 SMX60_IEU CSRRS csrrs s3, fflags, s5 # CHECK-NEXT: 1 1 0.50 U 1 SMX60_IEU CSRRC csrrc sp, 0, ra @@ -253,12 +253,12 @@ bseti a0, a1, 1 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU CZERO_NEZ czero.nez a0, a1, a2 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU ADD_UW add.uw a0, a0, a0 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU SLLI_UW slli.uw a0, a0, 1 -# CHECK-NEXT: 1 1 0.50 1 SMX60_IEU SH1ADD_UW sh1add.uw a0, a0, a0 -# CHECK-NEXT: 1 1 0.50 1 SMX60_IEU SH2ADD_UW sh2add.uw a0, a0, a0 -# CHECK-NEXT: 1 1 0.50 1 SMX60_IEU SH3ADD_UW sh3add.uw a0, a0, a0 -# CHECK-NEXT: 1 1 0.50 1 SMX60_IEU SH1ADD sh1add a0, a0, a0 -# CHECK-NEXT: 1 1 0.50 1 SMX60_IEU SH2ADD sh2add a0, a0, a0 -# CHECK-NEXT: 1 1 0.50 1 SMX60_IEU SH3ADD sh3add a0, a0, a0 +# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU SH1ADD_UW sh1add.uw a0, a0, a0 +# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU SH2ADD_UW sh2add.uw a0, a0, a0 +# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU SH3ADD_UW sh3add.uw a0, a0, a0 +# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU SH1ADD sh1add a0, a0, a0 +# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU SH2ADD sh2add a0, a0, a0 +# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU SH3ADD sh3add a0, a0, a0 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU ANDN andn a0, a0, a0 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU ORN orn a0, a0, a0 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU XNOR xnor a0, a0, a0 @@ -266,8 +266,8 @@ bseti a0, a1, 1 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU CLZW clzw a0, a0 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU CTZ ctz a0, a0 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU CTZW ctzw a0, a0 -# CHECK-NEXT: 1 1 0.50 1 SMX60_IEU CPOP cpop a0, a0 -# CHECK-NEXT: 1 1 0.50 1 SMX60_IEU CPOPW cpopw a0, a0 +# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU CPOP cpop a0, a0 +# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU CPOPW cpopw a0, a0 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU MIN min a0, a0, a0 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU MINU minu a0, a0, a0 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU MAX max a0, a0, a0 @@ -301,7 +301,7 @@ bseti a0, a1, 1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] -# CHECK-NEXT: - 179.00 43.00 5.50 5.50 +# CHECK-NEXT: - 155.00 43.00 5.50 5.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] Instructions: @@ -360,14 +360,14 @@ bseti a0, a1, 1 # CHECK-NEXT: - 0.50 0.50 - - mulhu a0, a0, a0 # CHECK-NEXT: - 0.50 0.50 - - mulhsu a0, a0, a0 # CHECK-NEXT: - 0.50 0.50 - - mulw a0, a0, a0 -# CHECK-NEXT: - 20.00 - - - div a0, a1, a2 -# CHECK-NEXT: - 20.00 - - - divu a0, a1, a2 -# CHECK-NEXT: - 20.00 - - - rem a0, a1, a2 -# CHECK-NEXT: - 20.00 - - - remu a0, a1, a2 -# CHECK-NEXT: - 12.00 - - - divw a0, a1, a2 -# CHECK-NEXT: - 12.00 - - - divuw a0, a1, a2 -# CHECK-NEXT: - 12.00 - - - remw a0, a1, a2 -# CHECK-NEXT: - 12.00 - - - remuw a0, a1, a2 +# CHECK-NEXT: - 23.00 - - - div a0, a1, a2 +# CHECK-NEXT: - 23.00 - - - divu a0, a1, a2 +# CHECK-NEXT: - 23.00 - - - rem a0, a1, a2 +# CHECK-NEXT: - 23.00 - - - remu a0, a1, a2 +# CHECK-NEXT: - 3.00 - - - divw a0, a1, a2 +# CHECK-NEXT: - 3.00 - - - divuw a0, a1, a2 +# CHECK-NEXT: - 3.00 - - - remw a0, a1, a2 +# CHECK-NEXT: - 3.00 - - - remuw a0, a1, a2 # CHECK-NEXT: - 0.50 0.50 - - csrrw t0, 4095, t1 # CHECK-NEXT: - 0.50 0.50 - - csrrs s3, fflags, s5 # CHECK-NEXT: - 0.50 0.50 - - csrrc sp, 0, ra From dbe264667c0262654e2f8b0f028b307b74703370 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Sun, 27 Apr 2025 15:46:39 -0300 Subject: [PATCH 12/20] Reorder includes Signed-off-by: Mikhail R. Gadelha --- llvm/lib/Target/RISCV/RISCV.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 6a6cec88b74a4..7e918e34f2471 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -52,12 +52,12 @@ include "RISCVSchedSiFive7.td" include "RISCVSchedSiFiveP400.td" include "RISCVSchedSiFiveP500.td" include "RISCVSchedSiFiveP600.td" +include "RISCVSchedSpacemitX60.td" include "RISCVSchedSyntacoreSCR1.td" include "RISCVSchedSyntacoreSCR345.td" include "RISCVSchedSyntacoreSCR7.td" include "RISCVSchedTTAscalonD8.td" include "RISCVSchedXiangShanNanHu.td" -include "RISCVSchedSpacemitX60.td" //===----------------------------------------------------------------------===// // RISC-V processors supported. From b73c7e0eccb3c3c214631fa68e2d66a7d6915a6b Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Sun, 27 Apr 2025 16:04:11 -0300 Subject: [PATCH 13/20] Fix div/rem latencies Signed-off-by: Mikhail R. Gadelha --- llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index 5378a098cff3c..ef87a4353768f 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -72,9 +72,6 @@ def : WriteRes { let Latency = 3; } let Latency = 3, ReleaseAtCycles = [3] in { def : WriteRes; def : WriteRes; -} - -let Latency = 23, ReleaseAtCycles = [23] in { def : WriteRes; def : WriteRes; } From a309a291687c20fab416de0bf391899f607f3bce Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Sun, 27 Apr 2025 16:16:11 -0300 Subject: [PATCH 14/20] Updated test case Signed-off-by: Mikhail R. Gadelha --- .../RISCV/rvv/vxrm-insert-out-of-loop.ll | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll index b384a0187a1ce..08cab7cd359b9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll @@ -307,28 +307,28 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64X60-NEXT: mul t2, a1, s1 ; RV64X60-NEXT: mul t3, a3, s1 ; RV64X60-NEXT: mul t4, a5, s1 -; RV64X60-NEXT: add s1, a0, a6 -; RV64X60-NEXT: add s0, a2, a6 +; RV64X60-NEXT: add s0, a0, a6 +; RV64X60-NEXT: add s1, a2, a6 ; RV64X60-NEXT: add t5, a4, a6 -; RV64X60-NEXT: add s2, s1, t2 +; RV64X60-NEXT: add s0, s0, t2 ; RV64X60-NEXT: csrr t2, vlenb -; RV64X60-NEXT: add t3, t3, s0 -; RV64X60-NEXT: or t6, a1, a3 +; RV64X60-NEXT: add t3, t3, s1 +; RV64X60-NEXT: li t6, 32 ; RV64X60-NEXT: add t4, t4, t5 -; RV64X60-NEXT: sltu s0, a0, t3 -; RV64X60-NEXT: sltu s1, a2, s2 -; RV64X60-NEXT: and t5, s0, s1 -; RV64X60-NEXT: slli t3, t2, 1 -; RV64X60-NEXT: slti s1, t6, 0 -; RV64X60-NEXT: sltu s0, a0, t4 -; RV64X60-NEXT: or t4, t5, s1 -; RV64X60-NEXT: sltu s1, a4, s2 +; RV64X60-NEXT: sltu t3, a0, t3 +; RV64X60-NEXT: sltu s1, a2, s0 +; RV64X60-NEXT: and t3, t3, s1 +; RV64X60-NEXT: or t5, a1, a3 +; RV64X60-NEXT: sltu s1, a0, t4 +; RV64X60-NEXT: sltu s0, a4, s0 +; RV64X60-NEXT: slti t4, t5, 0 ; RV64X60-NEXT: and s0, s0, s1 ; RV64X60-NEXT: or s1, a1, a5 -; RV64X60-NEXT: li t5, 32 +; RV64X60-NEXT: or t4, t3, t4 +; RV64X60-NEXT: slli t3, t2, 1 ; RV64X60-NEXT: slti s1, s1, 0 ; RV64X60-NEXT: or s0, s0, s1 -; RV64X60-NEXT: maxu s1, t3, t5 +; RV64X60-NEXT: maxu s1, t3, t6 ; RV64X60-NEXT: or s0, t4, s0 ; RV64X60-NEXT: sltu s1, a6, s1 ; RV64X60-NEXT: or s0, s0, s1 From dd5d7e012eedee8cf0f86ab49ecf78d6f8be1bc7 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Sun, 27 Apr 2025 17:05:21 -0300 Subject: [PATCH 15/20] More test updates Signed-off-by: Mikhail R. Gadelha --- .../tools/llvm-mca/RISCV/SpacemitX60/integer.s | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s index 5b1f64bc8320f..4a95dd3e7fdaa 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s @@ -233,10 +233,10 @@ bseti a0, a1, 1 # CHECK-NEXT: 1 6 0.50 6 SMX60_IEU MULHU mulhu a0, a0, a0 # CHECK-NEXT: 1 6 0.50 6 SMX60_IEU MULHSU mulhsu a0, a0, a0 # CHECK-NEXT: 1 3 0.50 3 SMX60_IEU MULW mulw a0, a0, a0 -# CHECK-NEXT: 1 23 23.00 23 SMX60_IEU[23],SMX60_IEUA[23] DIV div a0, a1, a2 -# CHECK-NEXT: 1 23 23.00 23 SMX60_IEU[23],SMX60_IEUA[23] DIVU divu a0, a1, a2 -# CHECK-NEXT: 1 23 23.00 23 SMX60_IEU[23],SMX60_IEUA[23] REM rem a0, a1, a2 -# CHECK-NEXT: 1 23 23.00 23 SMX60_IEU[23],SMX60_IEUA[23] REMU remu a0, a1, a2 +# CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] DIV div a0, a1, a2 +# CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] DIVU divu a0, a1, a2 +# CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] REM rem a0, a1, a2 +# CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] REMU remu a0, a1, a2 # CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] DIVW divw a0, a1, a2 # CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] DIVUW divuw a0, a1, a2 # CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] REMW remw a0, a1, a2 @@ -301,7 +301,7 @@ bseti a0, a1, 1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] -# CHECK-NEXT: - 155.00 43.00 5.50 5.50 +# CHECK-NEXT: - 75.00 43.00 5.50 5.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] Instructions: @@ -360,10 +360,10 @@ bseti a0, a1, 1 # CHECK-NEXT: - 0.50 0.50 - - mulhu a0, a0, a0 # CHECK-NEXT: - 0.50 0.50 - - mulhsu a0, a0, a0 # CHECK-NEXT: - 0.50 0.50 - - mulw a0, a0, a0 -# CHECK-NEXT: - 23.00 - - - div a0, a1, a2 -# CHECK-NEXT: - 23.00 - - - divu a0, a1, a2 -# CHECK-NEXT: - 23.00 - - - rem a0, a1, a2 -# CHECK-NEXT: - 23.00 - - - remu a0, a1, a2 +# CHECK-NEXT: - 3.00 - - - div a0, a1, a2 +# CHECK-NEXT: - 3.00 - - - divu a0, a1, a2 +# CHECK-NEXT: - 3.00 - - - rem a0, a1, a2 +# CHECK-NEXT: - 3.00 - - - remu a0, a1, a2 # CHECK-NEXT: - 3.00 - - - divw a0, a1, a2 # CHECK-NEXT: - 3.00 - - - divuw a0, a1, a2 # CHECK-NEXT: - 3.00 - - - remw a0, a1, a2 From 7f610b22bd1f519ee037df680487379a0c567e95 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Mon, 28 Apr 2025 10:15:03 -0300 Subject: [PATCH 16/20] Swap the order so comment makes more sense Signed-off-by: Mikhail R. Gadelha --- llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index ef87a4353768f..c686e2bad176f 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -63,10 +63,11 @@ def : WriteRes; def : WriteRes; // Integer multiplication -// The latency of mul is 5, while mulh, mulhsu, mulhu is 6. +def : WriteRes { let Latency = 3; } + +// The latency of mul is 5, while in mulh, mulhsu, mulhu is 6 // Worst case latency is used def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 3; } // Integer division/remainder let Latency = 3, ReleaseAtCycles = [3] in { From 7d0a715ea592c9b6b05ea86ee9a5ef97a483109e Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Mon, 28 Apr 2025 10:27:15 -0300 Subject: [PATCH 17/20] Revert back div/rem latencies Signed-off-by: Mikhail R. Gadelha --- .../lib/Target/RISCV/RISCVSchedSpacemitX60.td | 5 ++- .../llvm-mca/RISCV/SpacemitX60/integer.s | 34 +++++++++---------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index c686e2bad176f..11e641d2ea17f 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -70,9 +70,12 @@ def : WriteRes { let Latency = 3; } def : WriteRes { let Latency = 6; } // Integer division/remainder -let Latency = 3, ReleaseAtCycles = [3] in { +// Worst case latency is used +let Latency = 12, ReleaseAtCycles = [12] in { def : WriteRes; def : WriteRes; +} +let Latency = 20, ReleaseAtCycles = [20] in { def : WriteRes; def : WriteRes; } diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s index 4a95dd3e7fdaa..9f08d10efdf1c 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s @@ -233,14 +233,14 @@ bseti a0, a1, 1 # CHECK-NEXT: 1 6 0.50 6 SMX60_IEU MULHU mulhu a0, a0, a0 # CHECK-NEXT: 1 6 0.50 6 SMX60_IEU MULHSU mulhsu a0, a0, a0 # CHECK-NEXT: 1 3 0.50 3 SMX60_IEU MULW mulw a0, a0, a0 -# CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] DIV div a0, a1, a2 -# CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] DIVU divu a0, a1, a2 -# CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] REM rem a0, a1, a2 -# CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] REMU remu a0, a1, a2 -# CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] DIVW divw a0, a1, a2 -# CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] DIVUW divuw a0, a1, a2 -# CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] REMW remw a0, a1, a2 -# CHECK-NEXT: 1 3 3.00 3 SMX60_IEU[3],SMX60_IEUA[3] REMUW remuw a0, a1, a2 +# CHECK-NEXT: 1 20 20.00 20 SMX60_IEU[20],SMX60_IEUA[20] DIV div a0, a1, a2 +# CHECK-NEXT: 1 20 20.00 20 SMX60_IEU[20],SMX60_IEUA[20] DIVU divu a0, a1, a2 +# CHECK-NEXT: 1 20 20.00 20 SMX60_IEU[20],SMX60_IEUA[20] REM rem a0, a1, a2 +# CHECK-NEXT: 1 20 20.00 20 SMX60_IEU[20],SMX60_IEUA[20] REMU remu a0, a1, a2 +# CHECK-NEXT: 1 12 12.00 12 SMX60_IEU[12],SMX60_IEUA[12] DIVW divw a0, a1, a2 +# CHECK-NEXT: 1 12 12.00 12 SMX60_IEU[12],SMX60_IEUA[12] DIVUW divuw a0, a1, a2 +# CHECK-NEXT: 1 12 12.00 12 SMX60_IEU[12],SMX60_IEUA[12] REMW remw a0, a1, a2 +# CHECK-NEXT: 1 12 12.00 12 SMX60_IEU[12],SMX60_IEUA[12] REMUW remuw a0, a1, a2 # CHECK-NEXT: 1 1 0.50 U 1 SMX60_IEU CSRRW csrrw t0, 4095, t1 # CHECK-NEXT: 1 1 0.50 U 1 SMX60_IEU CSRRS csrrs s3, fflags, s5 # CHECK-NEXT: 1 1 0.50 U 1 SMX60_IEU CSRRC csrrc sp, 0, ra @@ -301,7 +301,7 @@ bseti a0, a1, 1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] -# CHECK-NEXT: - 75.00 43.00 5.50 5.50 +# CHECK-NEXT: - 179.00 43.00 5.50 5.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] Instructions: @@ -360,14 +360,14 @@ bseti a0, a1, 1 # CHECK-NEXT: - 0.50 0.50 - - mulhu a0, a0, a0 # CHECK-NEXT: - 0.50 0.50 - - mulhsu a0, a0, a0 # CHECK-NEXT: - 0.50 0.50 - - mulw a0, a0, a0 -# CHECK-NEXT: - 3.00 - - - div a0, a1, a2 -# CHECK-NEXT: - 3.00 - - - divu a0, a1, a2 -# CHECK-NEXT: - 3.00 - - - rem a0, a1, a2 -# CHECK-NEXT: - 3.00 - - - remu a0, a1, a2 -# CHECK-NEXT: - 3.00 - - - divw a0, a1, a2 -# CHECK-NEXT: - 3.00 - - - divuw a0, a1, a2 -# CHECK-NEXT: - 3.00 - - - remw a0, a1, a2 -# CHECK-NEXT: - 3.00 - - - remuw a0, a1, a2 +# CHECK-NEXT: - 20.00 - - - div a0, a1, a2 +# CHECK-NEXT: - 20.00 - - - divu a0, a1, a2 +# CHECK-NEXT: - 20.00 - - - rem a0, a1, a2 +# CHECK-NEXT: - 20.00 - - - remu a0, a1, a2 +# CHECK-NEXT: - 12.00 - - - divw a0, a1, a2 +# CHECK-NEXT: - 12.00 - - - divuw a0, a1, a2 +# CHECK-NEXT: - 12.00 - - - remw a0, a1, a2 +# CHECK-NEXT: - 12.00 - - - remuw a0, a1, a2 # CHECK-NEXT: - 0.50 0.50 - - csrrw t0, 4095, t1 # CHECK-NEXT: - 0.50 0.50 - - csrrs s3, fflags, s5 # CHECK-NEXT: - 0.50 0.50 - - csrrc sp, 0, ra From 840f37482a2b0529ab27e9607991909d7d6ce871 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Mon, 28 Apr 2025 10:44:46 -0300 Subject: [PATCH 18/20] Added tests for clmul, clmulr, clmulh Signed-off-by: Mikhail R. Gadelha --- .../test/tools/llvm-mca/RISCV/SpacemitX60/integer.s | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s index 9f08d10efdf1c..8b43874499f2b 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s @@ -149,6 +149,11 @@ orc.b a0, a0 rev8 a0, a0 +# Zbc +clmul a0, a0, a0 +clmulr a0, a0, a0 +clmulh a0, a0, a0 + # Zbs bclr a0, a1, a2 bclri a0, a1, 1 @@ -283,6 +288,9 @@ bseti a0, a1, 1 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU RORIW roriw a0, a0, 1 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU ORC_B orc.b a0, a0 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU REV8_RV64 rev8 a0, a0 +# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU CLMUL clmul a0, a0, a0 +# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU CLMULR clmulr a0, a0, a0 +# CHECK-NEXT: 1 2 0.50 2 SMX60_IEU CLMULH clmulh a0, a0, a0 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU BCLR bclr a0, a1, a2 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU BCLRI bclri a0, a1, 1 # CHECK-NEXT: 1 1 0.50 1 SMX60_IEU BEXT bext a0, a1, a2 @@ -301,7 +309,7 @@ bseti a0, a1, 1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] -# CHECK-NEXT: - 179.00 43.00 5.50 5.50 +# CHECK-NEXT: - 180.50 44.50 5.50 5.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] Instructions: @@ -410,6 +418,9 @@ bseti a0, a1, 1 # CHECK-NEXT: - 0.50 0.50 - - roriw a0, a0, 1 # CHECK-NEXT: - 0.50 0.50 - - orc.b a0, a0 # CHECK-NEXT: - 0.50 0.50 - - rev8 a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - clmul a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - clmulr a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - clmulh a0, a0, a0 # CHECK-NEXT: - 0.50 0.50 - - bclr a0, a1, a2 # CHECK-NEXT: - 0.50 0.50 - - bclri a0, a1, 1 # CHECK-NEXT: - 0.50 0.50 - - bext a0, a1, a2 From 8030c889724bb13784e1461aab5d3e0af5124bb0 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Mon, 28 Apr 2025 11:45:20 -0300 Subject: [PATCH 19/20] Add comment about div/rem Signed-off-by: Mikhail R. Gadelha --- llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index 11e641d2ea17f..e52e2b36ae1d4 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -70,7 +70,8 @@ def : WriteRes { let Latency = 3; } def : WriteRes { let Latency = 6; } // Integer division/remainder -// Worst case latency is used +// TODO: our experiments show that the latency of div is 4, which +// seems too low. We used the worst case latency from the C908 instead. let Latency = 12, ReleaseAtCycles = [12] in { def : WriteRes; def : WriteRes; From a55f7275ee39d3b8c29a883b3ca15f4dc8fd0834 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Mon, 28 Apr 2025 12:14:02 -0300 Subject: [PATCH 20/20] Fix comment Signed-off-by: Mikhail R. Gadelha --- llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index e52e2b36ae1d4..c21ab969d12ac 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -70,8 +70,8 @@ def : WriteRes { let Latency = 3; } def : WriteRes { let Latency = 6; } // Integer division/remainder -// TODO: our experiments show that the latency of div is 4, which -// seems too low. We used the worst case latency from the C908 instead. +// TODO: Latency set based on C908 datasheet and hasn't been +// confirmed experimentally. let Latency = 12, ReleaseAtCycles = [12] in { def : WriteRes; def : WriteRes;