Skip to content

Conversation

mshockwave
Copy link
Member

Vector to scalar movement instructions, as well as mask instructions like vcpop and vfirst, should have a higher latency & occupancy on SiFive7.

…t instructions

Co-Authored-By: Michael Maitland <michaeltmaitland@gmail.com>
@llvmbot
Copy link
Member

llvmbot commented Sep 22, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Min-Yih Hsu (mshockwave)

Changes

Vector to scalar movement instructions, as well as mask instructions like vcpop and vfirst, should have a higher latency & occupancy on SiFive7.


Patch is 76.99 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160155.diff

3 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVSchedSiFive7.td (+14-4)
  • (added) llvm/test/tools/llvm-mca/RISCV/SiFive7/mask.s (+125)
  • (added) llvm/test/tools/llvm-mca/RISCV/SiFive7/vmv.s (+748)
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index d81718c2361de..f2c946347923d 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -929,16 +929,16 @@ multiclass SiFive7WriteResBase<int VLEN,
   }
 
   // 15. Vector Mask Instructions
+  // Simple mask logical
   foreach mx = SchedMxList in {
     defvar Cycles = SiFive7GetCyclesVMask<mx>.c;
     defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
     let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
       defm : LMULWriteResMX<"WriteVMALUV", [VCQ, VA1], mx, IsWorstCase>;
-      defm : LMULWriteResMX<"WriteVMPopV", [VCQ, VA1], mx, IsWorstCase>;
-      defm : LMULWriteResMX<"WriteVMFFSV", [VCQ, VA1], mx, IsWorstCase>;
       defm : LMULWriteResMX<"WriteVMSFSV", [VCQ, VA1], mx, IsWorstCase>;
     }
   }
+  // Simple mask logical used in series
   foreach mx = SchedMxList in {
     defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
     defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -947,13 +947,23 @@ multiclass SiFive7WriteResBase<int VLEN,
       defm : LMULWriteResMX<"WriteVIdxV", [VCQ, VA1], mx, IsWorstCase>;
     }
   }
+  // Mask reduction
+  foreach mx = SchedMxList in {
+    defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+    let Latency = 11, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 11)] in {
+      defm "" : LMULWriteResMX<"WriteVMFFSV", [VCQ, VA1], mx, IsWorstCase>;
+      defm "" : LMULWriteResMX<"WriteVMPopV", [VCQ, VA1], mx, IsWorstCase>;
+    }
+  }
 
   // 16. Vector Permutation Instructions
+  let Latency = 11, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 11)] in {
+    def : WriteRes<WriteVMovXS, [VCQ, VA1]>;
+    def : WriteRes<WriteVMovFS, [VCQ, VA1]>;
+  }
   let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 1)] in {
     def : WriteRes<WriteVMovSX, [VCQ, VA1OrVA2]>;
-    def : WriteRes<WriteVMovXS, [VCQ, VA1]>;
     def : WriteRes<WriteVMovSF, [VCQ, VA1OrVA2]>;
-    def : WriteRes<WriteVMovFS, [VCQ, VA1]>;
   }
   foreach mx = SchedMxList in {
     defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFive7/mask.s b/llvm/test/tools/llvm-mca/RISCV/SiFive7/mask.s
new file mode 100644
index 0000000000000..6f46aa2eb959f
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFive7/mask.s
@@ -0,0 +1,125 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -iterations=1 -instruction-tables=full < %s | FileCheck %s
+
+vsetvli zero, zero, e32, m1, ta, ma
+
+vmslt.vv v0, v4, v20
+vmsle.vv v8, v4, v20
+vmsgt.vv v8, v20, v4
+vmsge.vv v8, v20, v4
+vmseq.vv v8, v4, v20
+vmsne.vv v8, v4, v20
+vmsltu.vv v8, v4, v20
+vmsleu.vv v8, v4, v20
+vmsgtu.vv v8, v20, v4
+vmsgeu.vv v8, v20, v4
+
+vmflt.vv v0, v4, v20
+vmfle.vv v8, v4, v20
+vmfgt.vv v8, v20, v4
+vmfge.vv v8, v20, v4
+vmfeq.vv v8, v4, v20
+vmfne.vv v8, v4, v20
+
+vmadc.vv v8, v4, v20
+vmsbc.vv v8, v4, v20
+
+vfirst.m a2, v4
+vpopc.m a2, v4
+
+viota.m v8, v4
+
+vmsbf.m v8, v4
+vmsif.m v8, v4
+vmsof.m v8, v4
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - VLEN512SiFive7FDiv:1
+# CHECK-NEXT: [1]   - VLEN512SiFive7IDiv:1
+# CHECK-NEXT: [2]   - VLEN512SiFive7PipeA:1
+# CHECK-NEXT: [3]   - VLEN512SiFive7PipeAB:2 VLEN512SiFive7PipeA, VLEN512SiFive7PipeB
+# CHECK-NEXT: [4]   - VLEN512SiFive7PipeB:1
+# CHECK-NEXT: [5]   - VLEN512SiFive7VA:1
+# CHECK-NEXT: [6]   - VLEN512SiFive7VCQ:1
+# CHECK-NEXT: [7]   - VLEN512SiFive7VL:1
+# CHECK-NEXT: [8]   - VLEN512SiFive7VS:1
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+# CHECK-NEXT: [7]: Bypass Latency
+# CHECK-NEXT: [8]: Resources (<Name> | <Name>[<ReleaseAtCycle>] | <Name>[<AcquireAtCycle>,<ReleaseAtCycle])
+# CHECK-NEXT: [9]: LLVM Opcode Name
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]                                        [9]                        Instructions:
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e32, m1, ta, ma
+# CHECK-NEXT:  1      5     2.00                         5     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMSLT_VV                   vmslt.vv	v0, v4, v20
+# CHECK-NEXT:  1      5     2.00                         5     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMSLE_VV                   vmsle.vv	v8, v4, v20
+# CHECK-NEXT:  1      5     2.00                         5     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMSLT_VV                   vmslt.vv	v8, v4, v20
+# CHECK-NEXT:  1      5     2.00                         5     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMSLE_VV                   vmsle.vv	v8, v4, v20
+# CHECK-NEXT:  1      5     2.00                         5     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMSEQ_VV                   vmseq.vv	v8, v4, v20
+# CHECK-NEXT:  1      5     2.00                         5     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMSNE_VV                   vmsne.vv	v8, v4, v20
+# CHECK-NEXT:  1      5     2.00                         5     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMSLTU_VV                  vmsltu.vv	v8, v4, v20
+# CHECK-NEXT:  1      5     2.00                         5     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMSLEU_VV                  vmsleu.vv	v8, v4, v20
+# CHECK-NEXT:  1      5     2.00                         5     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMSLTU_VV                  vmsltu.vv	v8, v4, v20
+# CHECK-NEXT:  1      5     2.00                         5     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMSLEU_VV                  vmsleu.vv	v8, v4, v20
+# CHECK-NEXT:  1      5     2.00                         5     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMFLT_VV                   vmflt.vv	v0, v4, v20
+# CHECK-NEXT:  1      5     2.00                         5     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMFLE_VV                   vmfle.vv	v8, v4, v20
+# CHECK-NEXT:  1      5     2.00                         5     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMFLT_VV                   vmflt.vv	v8, v4, v20
+# CHECK-NEXT:  1      5     2.00                         5     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMFLE_VV                   vmfle.vv	v8, v4, v20
+# CHECK-NEXT:  1      5     2.00                         5     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMFEQ_VV                   vmfeq.vv	v8, v4, v20
+# CHECK-NEXT:  1      5     2.00                         5     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMFNE_VV                   vmfne.vv	v8, v4, v20
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMADC_VV                   vmadc.vv	v8, v4, v20
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMSBC_VV                   vmsbc.vv	v8, v4, v20
+# CHECK-NEXT:  1      11    11.00                        11    VLEN512SiFive7VA[1,12],VLEN512SiFive7VCQ   VFIRST_M                   vfirst.m	a2, v4
+# CHECK-NEXT:  1      11    11.00                        11    VLEN512SiFive7VA[1,12],VLEN512SiFive7VCQ   VCPOP_M                    vcpop.m	a2, v4
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VIOTA_M                    viota.m	v8, v4
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VMSBF_M                    vmsbf.m	v8, v4
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VMSIF_M                    vmsif.m	v8, v4
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VMSOF_M                    vmsof.m	v8, v4
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - VLEN512SiFive7FDiv
+# CHECK-NEXT: [1]   - VLEN512SiFive7IDiv
+# CHECK-NEXT: [2]   - VLEN512SiFive7PipeA
+# CHECK-NEXT: [3]   - VLEN512SiFive7PipeB
+# CHECK-NEXT: [4]   - VLEN512SiFive7VA
+# CHECK-NEXT: [5]   - VLEN512SiFive7VCQ
+# CHECK-NEXT: [6]   - VLEN512SiFive7VL
+# CHECK-NEXT: [7]   - VLEN512SiFive7VS
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
+# CHECK-NEXT:  -      -     1.00    -     87.00  24.00   -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m1, ta, ma
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmslt.vv	v0, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmsle.vv	v8, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmslt.vv	v8, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmsle.vv	v8, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmseq.vv	v8, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmsne.vv	v8, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmsltu.vv	v8, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmsleu.vv	v8, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmsltu.vv	v8, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmsleu.vv	v8, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmflt.vv	v0, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmfle.vv	v8, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmflt.vv	v8, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmfle.vv	v8, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmfeq.vv	v8, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmfne.vv	v8, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmadc.vv	v8, v4, v20
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vmsbc.vv	v8, v4, v20
+# CHECK-NEXT:  -      -      -      -     12.00  1.00    -      -     vfirst.m	a2, v4
+# CHECK-NEXT:  -      -      -      -     12.00  1.00    -      -     vcpop.m	a2, v4
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     viota.m	v8, v4
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vmsbf.m	v8, v4
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vmsif.m	v8, v4
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vmsof.m	v8, v4
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFive7/vmv.s b/llvm/test/tools/llvm-mca/RISCV/SiFive7/vmv.s
new file mode 100644
index 0000000000000..36cabc54ce7a0
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFive7/vmv.s
@@ -0,0 +1,748 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -instruction-tables=full -iterations=1 < %s | FileCheck %s
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e8, mf4, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e8, mf2, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e8, m2, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e8, m4, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e8, m8, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e16, mf4, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e16, mf2, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e16, m2, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e16, m4, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e16, m8, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e32, mf2, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e32, m2, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e32, m4, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e32, m8, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e64, m2, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e64, m4, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e64, m8, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e8, mf8, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e8, mf4, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e8, mf2, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e8, m2, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e8, m4, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e8, m8, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e16, mf4, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e16, mf2, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e16, m2, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e16, m4, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e16, m8, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e32, mf2, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e32, m2, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e32, m4, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e32, m8, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e64, m2, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e64, m4, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e64, m8, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e8, mf8, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e8, mf4, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e8, mf2, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e8, m2, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e8, m4, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e8, m8, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e16, mf4, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e16, mf2, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e16, m2, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e16, m4, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e16, m8, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e32, mf2, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e32, m2, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e32, m4, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e32, m8, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e64, m2, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e64, m4, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e64, m8, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e8, mf8, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e8, mf4, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e8, mf2, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e8, m2, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e8, m4, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e8, m8, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e16, mf4, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e16, mf2, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e16, m2, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e16, m4, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e16, m8, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e32, mf2, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e32, m2, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e32, m4, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e32, m8, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e64, m2, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e64, m4, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e64, m8, tu, mu
+vmv8r.v v8, v16
+
+vsetvli zero, zero, e64, m1, tu, mu
+vmv.s.x v8, x5
+vmv.x.s x7, v16
+
+vsetvli zero, zero, e64, m2, tu, mu
+vmv.s.x v8, x5
+vmv.x.s x7, v16
+
+vsetvli zero, zero, e64, m4, tu, mu
+vmv.s.x v8, x5
+vmv.x.s x7, v16
+
+vsetvli zero, zero, e64, m8, tu, mu
+vmv.s.x v8, x5
+vmv.x.s x7, v16
+
+vsetvli zero, zero, e64, m1, tu, mu
+vfmv.s.f v8, f5
+vfmv.f.s f7, v16
+
+vsetvli zero, zero, e64, m2, tu, mu
+vfmv.s.f v8, f5
+vfmv.f.s f7, v16
+
+vsetvli zero, zero, e64, m4, tu, mu
+vfmv.s.f v8, f5
+vfmv.f.s f7, v16
+
+vsetvli zero, zero, e64, m8, tu, mu
+vfmv.s.f v8, f5
+vfmv.f.s f7, v16
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - VLEN512SiFive7FDiv:1
+# CHECK-NEXT: [1]   - VLEN512SiFive7IDiv:1
+# CHECK-NEXT: [2]   - VLEN512SiFive7PipeA:1
+# CHECK-NEXT: [3]   - VLEN512SiFive7PipeAB:2 VLEN512SiFive7PipeA, VLEN512SiFive7PipeB
+# CHECK-NEXT: [4]   - VLEN512SiFive7PipeB:1
+# CHECK-NEXT: [5]   - VLEN512SiFive7VA:1
+# CHECK-NEXT: [6]   - VLEN512SiFive7VCQ:1
+# CHECK-NEXT: [7]   - VLEN512SiFive7VL:1
+# CHECK-NEXT: [8]   - VLEN512SiFive7VS:1
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+# CHECK-NEXT: [7]: Bypass Latency
+# CHECK-NEXT: [8]: Resources (<Name> | <Name>[<ReleaseAtCycle>] | <Name>[<AcquireAtCycle>,<ReleaseAtCycle])
+# CHECK-NEXT: [9]: LLVM Opcode Name
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]                                        [9]                        Instructions:
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMV1R_V                    vmv1r.v	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMV1R_V                    vmv1r.v	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMV1R_V                    vmv1r.v	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMV1R_V                    vmv1r.v	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VMV1R_V                    vmv1r.v	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  1    ...
[truncated]

@mshockwave mshockwave marked this pull request as draft September 22, 2025 18:03
@mshockwave mshockwave marked this pull request as ready for review September 22, 2025 23:59
@mshockwave mshockwave merged commit fa240f2 into llvm:main Sep 23, 2025
11 checks passed
@mshockwave mshockwave deleted the patch/rvv/sifive-7-sched-red-permutation branch September 23, 2025 21:32
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants