-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV][llvm-mca] Use Sched*MC for Zvk MC instructions and add Zvk tests for P600 #89256
Conversation
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-tools-llvm-mca Author: Michael Maitland (michaelmaitland) ChangesThis patch adds a commit to use SchedMC for Zvk MC instructions. Then, there are a series of commits that add llvm-mca tests for P600 for each Zvk extension. Without the initial commit to use. SchedMC, many of these extensions did not work with llvm-mca because they contained instructions were missing scheduling information entirely. This patch demonstrates that llvm-mca works for Zvk instructions. This commit will serve as a pre-commit to patches that make sure that the P600 Zvk instructions have the correct behavior. For now, the llvm-mca tests just show the current behavior, even if that isn't the intended behavior longterm. By adding tests first, it will highlight just the behavior diff. Patch is 114.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/89256.diff 8 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
index aac7dc444a2de3..84c4ae859ef3cb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -24,11 +24,9 @@ def tuimm5 : RISCVOp, TImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
multiclass VCLMUL_MV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPMVV, opcodestr # "." # "vv">,
- Sched<[WriteVIALUV_WorstCase, ReadVIALUV_WorstCase,
- ReadVIALUV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">;
def X : VALUVX<funct6, OPMVX, opcodestr # "." # "vx">,
- Sched<[WriteVIALUX_WorstCase, ReadVIALUV_WorstCase,
- ReadVIALUX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">;
}
class RVInstIVI_VROR<bits<6> funct6, dag outs, dag ins, string opcodestr,
@@ -57,13 +55,13 @@ multiclass VROR_IV_V_X_I<string opcodestr, bits<6> funct6>
def I : RVInstIVI_VROR<funct6, (outs VR:$vd),
(ins VR:$vs2, uimm6:$imm, VMaskOp:$vm),
opcodestr # ".vi", "$vd, $vs2, $imm$vm">,
- Sched<[WriteVIALUI_WorstCase, ReadVIALUV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVIALUI", "ReadVIALUV">;
}
// op vd, vs2, vs1
class PALUVVNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : VALUVVNoVm<funct6, opv, opcodestr> {
+ : VALUVVNoVm<funct6, opv, opcodestr>,
+ SchedUnaryMC<"WriteVIALUI", "ReadVIALUV"> {
let Inst{6-0} = OPC_OP_VE.Value;
}
@@ -71,7 +69,8 @@ class PALUVVNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr>
class PALUVVNoVmTernary<bits<6> funct6, RISCVVFormat opv, string opcodestr>
: RVInstVV<funct6, opv, (outs VR:$vd_wb),
(ins VR:$vd, VR:$vs2, VR:$vs1),
- opcodestr, "$vd, $vs2, $vs1"> {
+ opcodestr, "$vd, $vs2, $vs1">,
+ SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV"> {
let Constraints = "$vd = $vd_wb";
let vm = 1;
let Inst{6-0} = OPC_OP_VE.Value;
@@ -79,7 +78,8 @@ class PALUVVNoVmTernary<bits<6> funct6, RISCVVFormat opv, string opcodestr>
// op vd, vs2, imm
class PALUVINoVm<bits<6> funct6, string opcodestr, Operand optype>
- : VALUVINoVm<funct6, opcodestr, optype> {
+ : VALUVINoVm<funct6, opcodestr, optype>,
+ SchedUnaryMC<"WriteVIALUV", "ReadVIALUV"> {
let Inst{6-0} = OPC_OP_VE.Value;
let Inst{14-12} = OPMVV.Value;
}
@@ -88,7 +88,8 @@ class PALUVINoVm<bits<6> funct6, string opcodestr, Operand optype>
class PALUVINoVmBinary<bits<6> funct6, string opcodestr, Operand optype>
: RVInstIVI<funct6, (outs VR:$vd_wb),
(ins VR:$vd, VR:$vs2, optype:$imm),
- opcodestr, "$vd, $vs2, $imm"> {
+ opcodestr, "$vd, $vs2, $imm">,
+ SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV"> {
let Constraints = "$vd = $vd_wb";
let vm = 1;
let Inst{6-0} = OPC_OP_VE.Value;
@@ -100,7 +101,8 @@ class PALUVINoVmBinary<bits<6> funct6, string opcodestr, Operand optype>
class PALUVs2NoVmBinary<bits<6> funct6, bits<5> vs1, RISCVVFormat opv,
string opcodestr>
: RVInstV<funct6, vs1, opv, (outs VR:$vd_wb), (ins VR:$vd, VR:$vs2),
- opcodestr, "$vd, $vs2"> {
+ opcodestr, "$vd, $vs2">,
+ SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV"> {
let Constraints = "$vd = $vd_wb";
let vm = 1;
let Inst{6-0} = OPC_OP_VE.Value;
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s
new file mode 100644
index 00000000000000..cee5c390d7c54b
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s
@@ -0,0 +1,472 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-p670 -iterations=1 < %s | FileCheck %s
+
+vsetvli zero, zero, e32, mf8, tu, mu
+vandn.vv v4, v8, v12
+vandn.vx v4, v8, a0
+vbrev.v v4, v8
+vbrev8.v v4, v8
+vrev8.v v4, v8
+vclz.v v4, v8
+vctz.v v4, v8
+vcpop.v v4, v8
+vrol.vv v4, v8, v12
+vrol.vx v4, v8, a0
+vror.vv v4, v8, v12
+vror.vx v4, v8, a0
+vror.vi v4, v8, 8
+
+vwsll.vv v4, v8, v12
+vwsll.vx v4, v8, a0
+vwsll.vi v4, v8, 8
+
+vsetvli zero, zero, e32, mf4, tu, mu
+vandn.vv v4, v8, v12
+vandn.vx v4, v8, a0
+vbrev.v v4, v8
+vbrev8.v v4, v8
+vrev8.v v4, v8
+vclz.v v4, v8
+vctz.v v4, v8
+vcpop.v v4, v8
+vrol.vv v4, v8, v12
+vrol.vx v4, v8, a0
+vror.vv v4, v8, v12
+vror.vx v4, v8, a0
+vror.vi v4, v8, 8
+
+vwsll.vv v4, v8, v12
+vwsll.vx v4, v8, a0
+vwsll.vi v4, v8, 8
+
+vsetvli zero, zero, e32, mf2, tu, mu
+vandn.vv v4, v8, v12
+vandn.vx v4, v8, a0
+vbrev.v v4, v8
+vbrev8.v v4, v8
+vrev8.v v4, v8
+vclz.v v4, v8
+vctz.v v4, v8
+vcpop.v v4, v8
+vrol.vv v4, v8, v12
+vrol.vx v4, v8, a0
+vror.vv v4, v8, v12
+vror.vx v4, v8, a0
+vror.vi v4, v8, 8
+
+vwsll.vv v4, v8, v12
+vwsll.vx v4, v8, a0
+vwsll.vi v4, v8, 8
+
+vsetvli zero, zero, e32, m1, tu, mu
+vandn.vv v4, v8, v12
+vandn.vx v4, v8, a0
+vbrev.v v4, v8
+vbrev8.v v4, v8
+vrev8.v v4, v8
+vclz.v v4, v8
+vctz.v v4, v8
+vcpop.v v4, v8
+vrol.vv v4, v8, v12
+vrol.vx v4, v8, a0
+vror.vv v4, v8, v12
+vror.vx v4, v8, a0
+vror.vi v4, v8, 8
+
+vwsll.vv v4, v8, v12
+vwsll.vx v4, v8, a0
+vwsll.vi v4, v8, 8
+
+vsetvli zero, zero, e32, m2, tu, mu
+vandn.vv v4, v8, v12
+vandn.vx v4, v8, a0
+vbrev.v v4, v8
+vbrev8.v v4, v8
+vrev8.v v4, v8
+vclz.v v4, v8
+vctz.v v4, v8
+vcpop.v v4, v8
+vrol.vv v4, v8, v12
+vrol.vx v4, v8, a0
+vror.vv v4, v8, v12
+vror.vx v4, v8, a0
+vror.vi v4, v8, 8
+
+vwsll.vv v4, v8, v12
+vwsll.vx v4, v8, a0
+vwsll.vi v4, v8, 8
+
+vsetvli zero, zero, e32, m4, tu, mu
+vandn.vv v4, v8, v12
+vandn.vx v4, v8, a0
+vbrev.v v4, v8
+vbrev8.v v4, v8
+vrev8.v v4, v8
+vclz.v v4, v8
+vctz.v v4, v8
+vcpop.v v4, v8
+vrol.vv v4, v8, v12
+vrol.vx v4, v8, a0
+vror.vv v4, v8, v12
+vror.vx v4, v8, a0
+vror.vi v4, v8, 8
+
+vwsll.vv v4, v8, v12
+vwsll.vx v4, v8, a0
+vwsll.vi v4, v8, 8
+
+vsetvli zero, zero, e32, m8, tu, mu
+vandn.vv v4, v8, v12
+vandn.vx v4, v8, a0
+vbrev.v v4, v8
+vbrev8.v v4, v8
+vrev8.v v4, v8
+vclz.v v4, v8
+vctz.v v4, v8
+vcpop.v v4, v8
+vrol.vv v4, v8, v12
+vrol.vx v4, v8, a0
+vror.vv v4, v8, v12
+vror.vx v4, v8, a0
+vror.vi v4, v8, 8
+
+vwsll.vv v4, v8, v12
+vwsll.vx v4, v8, a0
+vwsll.vi v4, v8, 8
+
+# Show SEW does not matter
+vsetvli zero, zero, e64, m4, tu, mu
+vandn.vv v4, v8, v12
+vandn.vx v4, v8, a0
+vbrev.v v4, v8
+vbrev8.v v4, v8
+vrev8.v v4, v8
+vclz.v v4, v8
+vctz.v v4, v8
+vcpop.v v4, v8
+vrol.vv v4, v8, v12
+vrol.vx v4, v8, a0
+vror.vv v4, v8, v12
+vror.vx v4, v8, a0
+vror.vi v4, v8, 8
+
+vwsll.vv v4, v8, v12
+vwsll.vx v4, v8, a0
+vwsll.vi v4, v8, 8
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 136
+# CHECK-NEXT: Total Cycles: 182
+# CHECK-NEXT: Total uOps: 136
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.75
+# CHECK-NEXT: Block RThroughput: 176.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu
+# CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12
+# CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0
+# CHECK-NEXT: 1 1 0.50 vbrev.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vbrev8.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vrev8.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vclz.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vctz.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vcpop.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vrol.vv v4, v8, v12
+# CHECK-NEXT: 1 1 0.50 vrol.vx v4, v8, a0
+# CHECK-NEXT: 1 1 0.50 vror.vv v4, v8, v12
+# CHECK-NEXT: 1 1 0.50 vror.vx v4, v8, a0
+# CHECK-NEXT: 1 1 0.50 vror.vi v4, v8, 8
+# CHECK-NEXT: 1 6 0.50 vwsll.vv v4, v8, v12
+# CHECK-NEXT: 1 6 0.50 vwsll.vx v4, v8, a0
+# CHECK-NEXT: 1 6 0.50 vwsll.vi v4, v8, 8
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu
+# CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12
+# CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0
+# CHECK-NEXT: 1 1 0.50 vbrev.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vbrev8.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vrev8.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vclz.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vctz.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vcpop.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vrol.vv v4, v8, v12
+# CHECK-NEXT: 1 1 0.50 vrol.vx v4, v8, a0
+# CHECK-NEXT: 1 1 0.50 vror.vv v4, v8, v12
+# CHECK-NEXT: 1 1 0.50 vror.vx v4, v8, a0
+# CHECK-NEXT: 1 1 0.50 vror.vi v4, v8, 8
+# CHECK-NEXT: 1 6 0.50 vwsll.vv v4, v8, v12
+# CHECK-NEXT: 1 6 0.50 vwsll.vx v4, v8, a0
+# CHECK-NEXT: 1 6 0.50 vwsll.vi v4, v8, 8
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12
+# CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0
+# CHECK-NEXT: 1 1 0.50 vbrev.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vbrev8.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vrev8.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vclz.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vctz.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vcpop.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vrol.vv v4, v8, v12
+# CHECK-NEXT: 1 1 0.50 vrol.vx v4, v8, a0
+# CHECK-NEXT: 1 1 0.50 vror.vv v4, v8, v12
+# CHECK-NEXT: 1 1 0.50 vror.vx v4, v8, a0
+# CHECK-NEXT: 1 1 0.50 vror.vi v4, v8, 8
+# CHECK-NEXT: 1 6 0.50 vwsll.vv v4, v8, v12
+# CHECK-NEXT: 1 6 0.50 vwsll.vx v4, v8, a0
+# CHECK-NEXT: 1 6 0.50 vwsll.vi v4, v8, 8
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12
+# CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0
+# CHECK-NEXT: 1 1 0.50 vbrev.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vbrev8.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vrev8.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vclz.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vctz.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vcpop.v v4, v8
+# CHECK-NEXT: 1 1 0.50 vrol.vv v4, v8, v12
+# CHECK-NEXT: 1 1 0.50 vrol.vx v4, v8, a0
+# CHECK-NEXT: 1 1 0.50 vror.vv v4, v8, v12
+# CHECK-NEXT: 1 1 0.50 vror.vx v4, v8, a0
+# CHECK-NEXT: 1 1 0.50 vror.vi v4, v8, 8
+# CHECK-NEXT: 1 6 0.50 vwsll.vv v4, v8, v12
+# CHECK-NEXT: 1 6 0.50 vwsll.vx v4, v8, a0
+# CHECK-NEXT: 1 6 0.50 vwsll.vi v4, v8, 8
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu
+# CHECK-NEXT: 1 1 1.00 vandn.vv v4, v8, v12
+# CHECK-NEXT: 1 1 1.00 vandn.vx v4, v8, a0
+# CHECK-NEXT: 1 1 1.00 vbrev.v v4, v8
+# CHECK-NEXT: 1 1 1.00 vbrev8.v v4, v8
+# CHECK-NEXT: 1 1 1.00 vrev8.v v4, v8
+# CHECK-NEXT: 1 1 1.00 vclz.v v4, v8
+# CHECK-NEXT: 1 1 1.00 vctz.v v4, v8
+# CHECK-NEXT: 1 1 1.00 vcpop.v v4, v8
+# CHECK-NEXT: 1 1 1.00 vrol.vv v4, v8, v12
+# CHECK-NEXT: 1 1 1.00 vrol.vx v4, v8, a0
+# CHECK-NEXT: 1 1 1.00 vror.vv v4, v8, v12
+# CHECK-NEXT: 1 1 1.00 vror.vx v4, v8, a0
+# CHECK-NEXT: 1 1 1.00 vror.vi v4, v8, 8
+# CHECK-NEXT: 1 6 1.00 vwsll.vv v4, v8, v12
+# CHECK-NEXT: 1 6 1.00 vwsll.vx v4, v8, a0
+# CHECK-NEXT: 1 6 1.00 vwsll.vi v4, v8, 8
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: 1 1 2.00 vandn.vv v4, v8, v12
+# CHECK-NEXT: 1 1 2.00 vandn.vx v4, v8, a0
+# CHECK-NEXT: 1 1 2.00 vbrev.v v4, v8
+# CHECK-NEXT: 1 1 2.00 vbrev8.v v4, v8
+# CHECK-NEXT: 1 1 2.00 vrev8.v v4, v8
+# CHECK-NEXT: 1 1 2.00 vclz.v v4, v8
+# CHECK-NEXT: 1 1 2.00 vctz.v v4, v8
+# CHECK-NEXT: 1 1 2.00 vcpop.v v4, v8
+# CHECK-NEXT: 1 1 2.00 vrol.vv v4, v8, v12
+# CHECK-NEXT: 1 1 2.00 vrol.vx v4, v8, a0
+# CHECK-NEXT: 1 1 2.00 vror.vv v4, v8, v12
+# CHECK-NEXT: 1 1 2.00 vror.vx v4, v8, a0
+# CHECK-NEXT: 1 1 2.00 vror.vi v4, v8, 8
+# CHECK-NEXT: 1 6 2.00 vwsll.vv v4, v8, v12
+# CHECK-NEXT: 1 6 2.00 vwsll.vx v4, v8, a0
+# CHECK-NEXT: 1 6 2.00 vwsll.vi v4, v8, 8
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu
+# CHECK-NEXT: 1 1 4.00 vandn.vv v4, v8, v12
+# CHECK-NEXT: 1 1 4.00 vandn.vx v4, v8, a0
+# CHECK-NEXT: 1 1 4.00 vbrev.v v4, v8
+# CHECK-NEXT: 1 1 4.00 vbrev8.v v4, v8
+# CHECK-NEXT: 1 1 4.00 vrev8.v v4, v8
+# CHECK-NEXT: 1 1 4.00 vclz.v v4, v8
+# CHECK-NEXT: 1 1 4.00 vctz.v v4, v8
+# CHECK-NEXT: 1 1 4.00 vcpop.v v4, v8
+# CHECK-NEXT: 1 1 4.00 vrol.vv v4, v8, v12
+# CHECK-NEXT: 1 1 4.00 vrol.vx v4, v8, a0
+# CHECK-NEXT: 1 1 4.00 vror.vv v4, v8, v12
+# CHECK-NEXT: 1 1 4.00 vror.vx v4, v8, a0
+# CHECK-NEXT: 1 1 4.00 vror.vi v4, v8, 8
+# CHECK-NEXT: 1 6 4.00 vwsll.vv v4, v8, v12
+# CHECK-NEXT: 1 6 4.00 vwsll.vx v4, v8, a0
+# CHECK-NEXT: 1 6 4.00 vwsll.vi v4, v8, 8
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu
+# CHECK-NEXT: 1 1 2.00 vandn.vv v4, v8, v12
+# CHECK-NEXT: 1 1 2.00 vandn.vx v4, v8, a0
+# CHECK-NEXT: 1 1 2.00 vbrev.v v4, v8
+# CHECK-NEXT: 1 1 2.00 vbrev8.v v4, v8
+# CHECK-NEXT: 1 1 2.00 vrev8.v v4, v8
+# CHECK-NEXT: 1 1 2.00 vclz.v v4, v8
+# CHECK-NEXT: 1 1 2.00 vctz.v v4, v8
+# CHECK-NEXT: 1 1 2.00 vcpop.v v4, v8
+# CHECK-NEXT: 1 1 2.00 vrol.vv v4, v8, v12
+# CHECK-NEXT: 1 1 2.00 vrol.vx v4, v8, a0
+# CHECK-NEXT: 1 1 2.00 vror.vv v4, v8, v12
+# CHECK-NEXT: 1 1 2.00 vror.vx v4, v8, a0
+# CHECK-NEXT: 1 1 2.00 vror.vi v4, v8, 8
+# CHECK-NEXT: 1 6 2.00 vwsll.vv v4, v8, v12
+# CHECK-NEXT: 1 6 2.00 vwsll.vx v4, v8, a0
+# CHECK-NEXT: 1 6 2.00 vwsll.vi v4, v8, 8
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SiFiveP600Div
+# CHECK-NEXT: [1] - SiFiveP600FEXQ0
+# CHECK-NEXT: [2] - SiFiveP600FEXQ1
+# CHECK-NEXT: [3] - SiFiveP600FloatDiv
+# CHECK-NEXT: [4] - SiFiveP600IEXQ0
+# CHECK-NEXT: [5] - SiFiveP600IEXQ1
+# CHECK-NEXT: [6] - SiFiveP600IEXQ2
+# CHECK-NEXT: [7] - SiFiveP600IEXQ3
+# CHECK-NEXT: [8.0] - SiFiveP600LDST
+# CHECK-NEXT: [8.1] - SiFiveP600LDST
+# CHECK-NEXT: [9] - SiFiveP600VDiv
+# CHECK-NEXT: [10] - SiFiveP600VEXQ0
+# CHECK-NEXT: [11] - SiFiveP600VEXQ1
+# CHECK-NEXT: [12] - SiFiveP600VFloatDiv
+# CHECK-NEXT: [13] - SiFiveP600VLD
+# CHECK-NEXT: [14] - SiFiveP600VST
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14]
+# CHECK-NEXT: - - - - 8.00 - - - - - - 176.00 176.00 - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions:
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vandn.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vx v4, v8, a0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vbrev.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev8.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrev8.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclz.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vctz.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vcpop.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrol.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - ...
[truncated]
|
} | ||
|
||
// op vd, vs2, vs1 | ||
class PALUVVNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr> | ||
: VALUVVNoVm<funct6, opv, opcodestr> { | ||
: VALUVVNoVm<funct6, opv, opcodestr>, | ||
SchedUnaryMC<"WriteVIALUI", "ReadVIALUV"> { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is inconsistent with how this is done in RISCVInstrInfoV.td. PALUVVNoVm is intended to be a special version of VALUVVNoVm
with a different opcode. Since VALUVVNoVm doesn't contain a Sched*MC, PALUVVNoVm
shouldn't either.
Just want to make sure, are the rest of the 7 commits here all pre-commit tests? What about the changes on these tests after Sched*MC is applied? |
llvm-mca will error out if SchedMC is not applied. I don't want to commit these tests before SchedMC patch. My plan is to squash everything in this patch into one commit, unless others have a different preference. |
@@ -71,7 +68,8 @@ class PALUVVNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr> | |||
class PALUVVNoVmTernary<bits<6> funct6, RISCVVFormat opv, string opcodestr> | |||
: RVInstVV<funct6, opv, (outs VR:$vd_wb), | |||
(ins VR:$vd, VR:$vs2, VR:$vs1), | |||
opcodestr, "$vd, $vs2, $vs1"> { | |||
opcodestr, "$vd, $vs2, $vs1">, | |||
SchedTernaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", "ReadVIALUV"> { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure this the right layer for this either. You're probably going to need different classes for VGHSH and SHA2 in the future.
8b3bd76
to
0eaece0
Compare
@@ -88,7 +85,8 @@ class PALUVINoVm<bits<6> funct6, string opcodestr, Operand optype> | |||
class PALUVINoVmBinary<bits<6> funct6, string opcodestr, Operand optype> | |||
: RVInstIVI<funct6, (outs VR:$vd_wb), | |||
(ins VR:$vd, VR:$vs2, optype:$imm), | |||
opcodestr, "$vd, $vs2, $imm"> { | |||
opcodestr, "$vd, $vs2, $imm">, | |||
SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV"> { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The same reasoning applies to this one and PALUVs2NoVmBinary
. Sorry I didn't say it before.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PALUVs2NoVmBinary was not addressed.
0eaece0
to
b6ae512
Compare
vror.vx v4, v8, a0 | ||
vror.vi v4, v8, 8 | ||
|
||
vwsll.vv v4, v8, v12 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This isn't a valid instruction. It would widen to LMUL=16
vwsll.vi v4, v8, 8 | ||
|
||
vsetvli zero, zero, e32, m8, tu, mu | ||
vandn.vv v4, v8, v12 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This isn't a legal instruction. The registers aren't aligned to a multiple of 8 as required by LMUL=8.
vror.vx v4, v8, a0 | ||
vror.vi v4, v8, 8 | ||
|
||
vwsll.vv v4, v8, v12 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This isn't legal, the destination register is LMUL=8, but the register number is not a multiple of 8.
vror.vx v4, v8, a0 | ||
vror.vi v4, v8, 8 | ||
|
||
vwsll.vv v4, v8, v12 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This isn't legal, the destination elements would have sew=128 since its widening from e64.
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-p670 -iterations=1 < %s | FileCheck %s | ||
|
||
vsetvli zero, zero, e32, mf8, tu, mu |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same comment as zvbb.s
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CLMUL only supports SEW=64
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My bad, I read SEW is any value other than 64
as that is what was required, instead of what was reserved.
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-p670 -iterations=1 < %s | FileCheck %s | ||
|
||
vsetvli zero, zero, e32, mf8, tu, mu |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
e32 is still not valid with mf8
vaesz.vs v4, v8 | ||
|
||
vsetvli zero, zero, e32, m8, tu, mu | ||
vaesef.vv v4, v8 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Registers are not aligned for LMUL=8
vgmul.vv v4, v8 | ||
|
||
vsetvli zero, zero, e32, m8, tu, mu | ||
vghsh.vv v4, v8, v12 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Registers are not aligned for LMUL=8
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-p670 -iterations=1 < %s | FileCheck %s | ||
|
||
vsetvli zero, zero, e32, mf8, tu, mu |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
e32 is not supported for mf8
vsha2cl.vv v4, v8, v12 | ||
|
||
vsetvli zero, zero, e64, m8, tu, mu | ||
vsha2ms.vv v4, v8, v12 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Registers are not aligned
vsha2cl.vv v4, v8, v12 | ||
|
||
vsetvli zero, zero, e64, mf8, tu, mu | ||
vsha2ms.vv v4, v8, v12 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Registers are not aligned for LMUL=8
vsm4r.vs v4, v8 | ||
|
||
vsetvli zero, zero, e32, m8, tu, mu | ||
vsm4k.vi v4, v8, 8 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Registers are not aligned
vsm3c.vi v4, v8, 8 | ||
|
||
vsetvli zero, zero, e32, m8, tu, mu | ||
vsm3me.vv v4, v8, v12 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Registers are not aligned
4ed7753
to
ec9ff50
Compare
vsha2ms.vv v4, v8, v12 | ||
vsha2ch.vv v4, v8, v12 | ||
vsha2cl.vv v4, v8, v12 | ||
# SEW is only e8 or e64 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
e32 or e64?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
updated
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu | ||
# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vsha2ms.vv v4, v8, v12 | ||
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2ms.vv v4, v8, v12 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not directly related to this patch. Do we need to have multiple uops defined to get llvm-mca to distribute the pressure across both ALUs?
vclmulh.vx v4, v8, a0 | ||
|
||
vsetvli zero, zero, e64, m8, tu, mu | ||
vclmul.vv v4, v8, v12 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Registers not aligned.
I think we really need a tool to verify MCInst level RVV instructions. :-) |
b1b69b8
to
f27c8d6
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
The vector crypto instructions may have different scheduling behavior compared to VALU operations. Instead of using scheduling resources that describe VALU operations, we give these instructions their own scheduling resources. This is similar to what we did for Zb* instructions. The sifive-p670 has vector crypto, so we model behavior for these instructions in the P600SchedModel. The numbers are based off of measurements collected internally. These numbers are a bit old and new measurements show that they may not be fully accurate. It is likely that we will refine these numbers in a follow up patch(s) based on new measurements. This PR is stacked on #89256.
This patch adds a commit to use
Sched*MC
for Zvk MC instructions. Then, there are a series of commits that add llvm-mca tests for P600 for each Zvk extension. Without the initial commit to use.Sched*MC
, many of these extensions did not work with llvm-mca because they contained instructions were missing scheduling information entirely. This patch demonstrates that llvm-mca works for Zvk instructions.This commit will serve as a pre-commit to patches that make sure that the P600 Zvk instructions have the correct behavior. For now, the llvm-mca tests just show the current behavior, even if that isn't the intended behavior longterm. By adding tests first, it will highlight just the behavior diff.
I plan to squash the commits here into one unless preferred otherwise.