From 90c582021bf6cb897b6a355b96f0a135e23aa7df Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Tue, 23 Apr 2024 09:32:14 -0700 Subject: [PATCH] [RISCV] Add Sched classes for vector crypto instructions The vector crypto instructions may have different scheduling behavior compared to VALU operations. Instead of using scheduling resources that describe VALU operations, we give these instructions their own scheduling resources. This is similar to what we did for Zb* instructions. The sifive-p670 has vector crypto, so we model behavior for these instructions in the P600SchedModel. The numbers are based off of measurements collected internally. These numbers are a bit old and new measurments show that they may not be fully accurate. It is likley that we will refine these numbers in a follow up patch(s) based on new measurments. --- llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td | 144 ++++++++++--- llvm/lib/Target/RISCV/RISCVSchedRocket.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td | 1 + llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 81 +++++++ .../Target/RISCV/RISCVSchedSyntacoreSCR1.td | 1 + .../Target/RISCV/RISCVSchedXiangShanNanHu.td | 1 + llvm/lib/Target/RISCV/RISCVSchedule.td | 1 + llvm/lib/Target/RISCV/RISCVScheduleZvk.td | 202 ++++++++++++++++++ .../tools/llvm-mca/RISCV/SiFiveP600/zvbb.s | 142 ++++++------ .../tools/llvm-mca/RISCV/SiFiveP600/zvbc.s | 38 ++-- .../tools/llvm-mca/RISCV/SiFiveP600/zvkg.s | 18 +- 12 files changed, 498 insertions(+), 133 deletions(-) create mode 100644 llvm/lib/Target/RISCV/RISCVScheduleZvk.td diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index 94be3b72ffc264..d60aea186aa5fe 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -24,9 +24,9 @@ def tuimm5 : RISCVOp, TImmLeaf(Imm);}]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { multiclass VCLMUL_MV_V_X funct6> { def V : VALUVV, - SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; + SchedBinaryMC<"WriteVCLMUL", "ReadVCLMUL", "ReadVCLMUL">; def X : VALUVX, - SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; + SchedBinaryMC<"WriteVCLMULH", "ReadVCLMULH", "ReadVCLMULH">; } class RVInstIVI_VROR funct6, dag outs, dag ins, string opcodestr, @@ -55,7 +55,7 @@ multiclass VROR_IV_V_X_I funct6> def I : RVInstIVI_VROR, - SchedUnaryMC<"WriteVIALUI", "ReadVIALUV">; + SchedUnaryMC<"WriteVROR", "ReadVROR">; } // op vd, vs2, vs1 @@ -107,10 +107,10 @@ multiclass VAES_MV_V_S funct6_vv, bits<6> funct6_vs, bits<5> vs1, RISCVVFormat opv, string opcodestr> { let RVVConstraint = NoConstraint in def NAME # _VV : PALUVs2NoVmBinary, - SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; + SchedBinaryMC<"WriteVAESMV", "ReadVAESMV", "ReadVAESMV">; let RVVConstraint = VS2Constraint in def NAME # _VS : PALUVs2NoVmBinary, - SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; + SchedBinaryMC<"WriteVAESMV", "ReadVAESMV", "ReadVAESMV">; } } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 @@ -142,22 +142,22 @@ let Predicates = [HasStdExtZvkb] in { let Predicates = [HasStdExtZvkg], RVVConstraint = NoConstraint in { def VGHSH_VV : PALUVVNoVmTernary<0b101100, OPMVV, "vghsh.vv">, - SchedTernaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", - "ReadVIALUV">; + SchedTernaryMC<"WriteVGHSH", "ReadVGHSH", "ReadVGHSH", + "ReadVGHSH">; def VGMUL_VV : PALUVs2NoVmBinary<0b101000, 0b10001, OPMVV, "vgmul.vv">, - SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; + SchedBinaryMC<"WriteVGMUL", "ReadVGMUL", "ReadVGMUL">; } // Predicates = [HasStdExtZvkg] let Predicates = [HasStdExtZvknhaOrZvknhb], RVVConstraint = Sha2Constraint in { def VSHA2CH_VV : PALUVVNoVmTernary<0b101110, OPMVV, "vsha2ch.vv">, - SchedTernaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", - "ReadVIALUV">; + SchedTernaryMC<"WriteVSHA2CH", "ReadVSHA2CH", "ReadVSHA2CH", + "ReadVSHA2CH">; def VSHA2CL_VV : PALUVVNoVmTernary<0b101111, OPMVV, "vsha2cl.vv">, - SchedTernaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", - "ReadVIALUV">; + SchedTernaryMC<"WriteVSHA2CL", "ReadVSHA2CL", "ReadVSHA2CL", + "ReadVSHA2CL">; def VSHA2MS_VV : PALUVVNoVmTernary<0b101101, OPMVV, "vsha2ms.vv">, - SchedTernaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", - "ReadVIALUV">; + SchedTernaryMC<"WriteVSHA2MS", "ReadVSHA2MS", "ReadVSHA2MS", + "ReadVSHA2MS">; } // Predicates = [HasStdExtZvknhaOrZvknhb] let Predicates = [HasStdExtZvkned] in { @@ -166,26 +166,26 @@ let Predicates = [HasStdExtZvkned] in { defm VAESEF : VAES_MV_V_S<0b101000, 0b101001, 0b00011, OPMVV, "vaesef">; defm VAESEM : VAES_MV_V_S<0b101000, 0b101001, 0b00010, OPMVV, "vaesem">; def VAESKF1_VI : PALUVINoVm<0b100010, "vaeskf1.vi", uimm5>, - SchedUnaryMC<"WriteVIALUV", "ReadVIALUV">; + SchedUnaryMC<"WriteVAESKF1", "ReadVAESKF2">; def VAESKF2_VI : PALUVINoVmBinary<0b101010, "vaeskf2.vi", uimm5>, - SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; + SchedBinaryMC<"WriteVAESKF2", "ReadVAESKF2", "ReadVAESKF2">; let RVVConstraint = VS2Constraint in def VAESZ_VS : PALUVs2NoVmBinary<0b101001, 0b00111, OPMVV, "vaesz.vs">, - SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; + SchedBinaryMC<"WriteVAESZ", "ReadVAESZ", "ReadVAESZ">; } // Predicates = [HasStdExtZvkned] let Predicates = [HasStdExtZvksed] in { let RVVConstraint = NoConstraint in def VSM4K_VI : PALUVINoVm<0b100001, "vsm4k.vi", uimm5>, - SchedUnaryMC<"WriteVIALUV", "ReadVIALUV">; + SchedUnaryMC<"WriteVSM4K", "ReadVSM4K">; defm VSM4R : VAES_MV_V_S<0b101000, 0b101001, 0b10000, OPMVV, "vsm4r">; } // Predicates = [HasStdExtZvksed] let Predicates = [HasStdExtZvksh], RVVConstraint = VS2Constraint in { def VSM3C_VI : PALUVINoVmBinary<0b101011, "vsm3c.vi", uimm5>, - SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; + SchedBinaryMC<"WriteVSM3C", "ReadVSM3C", "ReadVSM3C">; def VSM3ME_VV : PALUVVNoVm<0b100000, OPMVV, "vsm3me.vv">, - SchedUnaryMC<"WriteVIALUI", "ReadVIALUV">; + SchedUnaryMC<"WriteVSM3ME", "ReadVSM3ME">; } // Predicates = [HasStdExtZvksh] //===----------------------------------------------------------------------===// @@ -337,10 +337,10 @@ multiclass VPseudoVCLMUL_VV_VX { foreach m = MxList in { defvar mx = m.MX; defm "" : VPseudoBinaryV_VV, - SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", mx, + SchedBinary<"WriteVCLMUL", "ReadVCLMUL", "ReadVCLMUL", mx, forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VX, - SchedBinary<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX", mx, + SchedBinary<"WriteVCLMULH", "ReadVCLMULH", "ReadVCLMULH", mx, forceMergeOpRead=true>; } } @@ -354,28 +354,104 @@ multiclass VPseudoUnaryV_V { } } -multiclass VPseudoVALU_V { +multiclass VPseudoVBREV { foreach m = MxList in { defvar mx = m.MX; defm "" : VPseudoUnaryV_V, - SchedUnary<"WriteVIALUV", "ReadVIALUV", mx, - forceMergeOpRead=true>; + SchedUnary<"WriteVBREV", "ReadVBREV", mx, forceMergeOpRead=true>; + } +} + +multiclass VPseudoVCLZ { + foreach m = MxList in { + defvar mx = m.MX; + defm "" : VPseudoUnaryV_V, + SchedUnary<"WriteVCLZ", "ReadVCLZ", mx, forceMergeOpRead=true>; + } +} + +multiclass VPseudoVCTZ { + foreach m = MxList in { + defvar mx = m.MX; + defm "" : VPseudoUnaryV_V, + SchedUnary<"WriteVCTZ", "ReadVCTZ", mx, forceMergeOpRead=true>; + } +} + +multiclass VPseudoVCPOP { + foreach m = MxList in { + defvar mx = m.MX; + defm "" : VPseudoUnaryV_V, + SchedUnary<"WriteVCPOP", "ReadVCPOP", mx, forceMergeOpRead=true>; } } multiclass VPseudoVWALU_VV_VX_VI : VPseudoVWALU_VV_VX { foreach m = MxListW in { defm "" : VPseudoBinaryW_VI, - SchedUnary<"WriteVIWALUV", "ReadVIWALUV", m.MX, + SchedUnary<"WriteVWSLL", "ReadVWSLL", m.MX, forceMergeOpRead=true>; } } +multiclass VPseudoVANDN { + foreach m = MxList in { + defm "" : VPseudoBinaryV_VV, + SchedBinary<"WriteVANDN", "ReadVANDN", "ReadVANDN", m.MX, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_VX, + SchedBinary<"WriteVANDN", "ReadVANDN", "ReadVANDN", m.MX, + forceMergeOpRead=true>; + } +} + +multiclass VPseudoVBREV8 { + foreach m = MxList in { + defvar mx = m.MX; + defm "" : VPseudoUnaryV_V, + SchedUnary<"WriteVBREV8", "ReadVBREV8", mx, forceMergeOpRead=true>; + } +} + +multiclass VPseudoVREV8 { + foreach m = MxList in { + defvar mx = m.MX; + defm "" : VPseudoUnaryV_V, + SchedUnary<"WriteVREV8", "ReadVREV8", mx, forceMergeOpRead=true>; + } +} + +multiclass VPseudoVROL { + foreach m = MxList in { + defm "" : VPseudoBinaryV_VV, + SchedBinary<"WriteVROL", "ReadVROL", "ReadVROL", m.MX, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_VX, + SchedBinary<"WriteVROL", "ReadVROL", "ReadVROL", m.MX, + forceMergeOpRead=true>; + } +} + +multiclass VPseudoVROR { + defvar Constraint = ""; + foreach m = MxList in { + defvar mx = m.MX; + defm "" : VPseudoBinaryV_VV, + SchedBinary<"WriteVROR", "ReadVROR", "ReadVROR", mx, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_VX, + SchedBinary<"WriteVROR", "ReadVROR", "ReadVROR", mx, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_VI, + SchedUnary<"WriteVROR", "ReadVROR", mx, forceMergeOpRead=true>; + } +} + let Predicates = [HasStdExtZvbb] in { - defm PseudoVBREV : VPseudoVALU_V; - defm PseudoVCLZ : VPseudoVALU_V; - defm PseudoVCTZ : VPseudoVALU_V; - defm PseudoVCPOP : VPseudoVALU_V; + defm PseudoVBREV : VPseudoVBREV; + defm PseudoVCLZ : VPseudoVCLZ; + defm PseudoVCTZ : VPseudoVCTZ; + defm PseudoVCPOP : VPseudoVCPOP; defm PseudoVWSLL : VPseudoVWALU_VV_VX_VI; } // Predicates = [HasStdExtZvbb] @@ -385,10 +461,10 @@ let Predicates = [HasStdExtZvbc] in { } // Predicates = [HasStdExtZvbc] let Predicates = [HasStdExtZvkb] in { - defm PseudoVANDN : VPseudoVALU_VV_VX; - defm PseudoVBREV8 : VPseudoVALU_V; - defm PseudoVREV8 : VPseudoVALU_V; - defm PseudoVROL : VPseudoVALU_VV_VX; + defm PseudoVANDN : VPseudoVANDN; + defm PseudoVBREV8 : VPseudoVBREV8; + defm PseudoVREV8 : VPseudoVREV8; + defm PseudoVROL : VPseudoVROL; defm PseudoVROR : VPseudoVALU_VV_VX_VI; } // Predicates = [HasStdExtZvkb] diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td index 65494e73758d63..9ddc4281092dd1 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -262,4 +262,5 @@ defm : UnsupportedSchedZfa; defm : UnsupportedSchedZfh; defm : UnsupportedSchedSFB; defm : UnsupportedSchedXsfvcp; +defm : UnsupportedSchedZvk; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index a532066b3a1c83..e67da839bdb876 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -1298,4 +1298,5 @@ defm : UnsupportedSchedZbc; defm : UnsupportedSchedZbkb; defm : UnsupportedSchedZbkx; defm : UnsupportedSchedZfa; +defm : UnsupportedSchedZvk; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td index fccdd7e4f3ec2e..a37958826e028a 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td @@ -367,4 +367,5 @@ defm : UnsupportedSchedSFB; defm : UnsupportedSchedZfa; defm : UnsupportedSchedV; defm : UnsupportedSchedXsfvcp; +defm : UnsupportedSchedZvk; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td index 6e4fb19361f553..cf81101401af84 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td @@ -748,6 +748,60 @@ foreach mx = SchedMxList in { } } +// Vector Crypto +foreach mx = SchedMxList in { + defvar LMulLat = SiFiveP600GetLMulCycles.c; + defvar IsWorstCase = SiFiveP600IsWorstCaseMX.c; + // Zvbb + let Latency = 2, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVBREV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVCLZ", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVCPOP", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVCTZ", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVWSLL", [SiFiveP600VectorArith], mx, IsWorstCase>; + } + // Zvbc + let Latency = 2, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVCLMUL", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVCLMULH", [SiFiveP600VectorArith], mx, IsWorstCase>; + } + // Zvkb + let Latency = 1, ReleaseAtCycles = [LMulLat] in + defm "" : LMULWriteResMX<"WriteVANDN", [SiFiveP600VectorArith], mx, IsWorstCase>; + let Latency = 2, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVBREV8", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVREV8", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVROL", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVROR", [SiFiveP600VectorArith], mx, IsWorstCase>; + } + // Zvkg + let Latency = 2, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVGHSH", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVGMUL", [SiFiveP600VectorArith], mx, IsWorstCase>; + } + // ZvknhaOrZvknhb + let Latency = 3, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVSHA2CH", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSHA2CL", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSHA2MS", [SiFiveP600VectorArith], mx, IsWorstCase>; + } + // Zvkned + let Latency = 2, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVAESMV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAESKF1", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAESKF2", [SiFiveP600VectorArith], mx, IsWorstCase>; + } + let Latency = 1, ReleaseAtCycles = [LMulLat] in + defm "" : LMULWriteResMX<"WriteVAESZ", [SiFiveP600VectorArith], mx, IsWorstCase>; + // Zvksed + let Latency = 3, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVSM4K", [SiFiveP600VEXQ0], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSM4R", [SiFiveP600VEXQ0], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSM3C", [SiFiveP600VEXQ0], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSM3ME", [SiFiveP600VEXQ0], mx, IsWorstCase>; + } +} + // Others def : WriteRes; def : WriteRes; @@ -1032,6 +1086,33 @@ foreach mx = SchedMxList in { def : ReadAdvance("ReadVMergeOp_" # mx # "_E" # sew), 0>; } +// Vector Crypto Extensions +defm "" : LMULReadAdvance<"ReadVBREV", 0>; +defm "" : LMULReadAdvance<"ReadVCLZ", 0>; +defm "" : LMULReadAdvance<"ReadVCPOP", 0>; +defm "" : LMULReadAdvance<"ReadVCTZ", 0>; +defm "" : LMULReadAdvance<"ReadVWSLL", 0>; +defm "" : LMULReadAdvance<"ReadVCLMUL", 0>; +defm "" : LMULReadAdvance<"ReadVCLMULH", 0>; +defm "" : LMULReadAdvance<"ReadVANDN", 0>; +defm "" : LMULReadAdvance<"ReadVBREV8", 0>; +defm "" : LMULReadAdvance<"ReadVREV8", 0>; +defm "" : LMULReadAdvance<"ReadVROL", 0>; +defm "" : LMULReadAdvance<"ReadVROR", 0>; +defm "" : LMULReadAdvance<"ReadVGHSH", 0>; +defm "" : LMULReadAdvance<"ReadVGMUL", 0>; +defm "" : LMULReadAdvance<"ReadVSHA2CH", 0>; +defm "" : LMULReadAdvance<"ReadVSHA2CL", 0>; +defm "" : LMULReadAdvance<"ReadVSHA2MS", 0>; +defm "" : LMULReadAdvance<"ReadVAESMV", 0>; +defm "" : LMULReadAdvance<"ReadVAESKF1", 0>; +defm "" : LMULReadAdvance<"ReadVAESKF2", 0>; +defm "" : LMULReadAdvance<"ReadVAESZ", 0>; +defm "" : LMULReadAdvance<"ReadVSM4K", 0>; +defm "" : LMULReadAdvance<"ReadVSM4R", 0>; +defm "" : LMULReadAdvance<"ReadVSM3C", 0>; +defm "" : LMULReadAdvance<"ReadVSM3ME", 0>; + //===----------------------------------------------------------------------===// // Unsupported extensions defm : UnsupportedSchedZabha; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td index 0885e325f24e68..31112d140cde7a 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td @@ -213,4 +213,5 @@ defm : UnsupportedSchedZbkx; defm : UnsupportedSchedZfa; defm : UnsupportedSchedZfh; defm : UnsupportedSchedXsfvcp; +defm : UnsupportedSchedZvk; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td index e0f1fab1d6b409..dcd1a938a91472 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td +++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td @@ -312,4 +312,5 @@ defm : UnsupportedSchedZfh; defm : UnsupportedSchedSFB; defm : UnsupportedSchedZabha; defm : UnsupportedSchedXsfvcp; +defm : UnsupportedSchedZvk; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td index 0086557a41fe7c..d9a2e38c0e9d73 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedule.td +++ b/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -297,3 +297,4 @@ def : ReadAdvance; include "RISCVScheduleZb.td" include "RISCVScheduleV.td" include "RISCVScheduleXSf.td" +include "RISCVScheduleZvk.td" diff --git a/llvm/lib/Target/RISCV/RISCVScheduleZvk.td b/llvm/lib/Target/RISCV/RISCVScheduleZvk.td new file mode 100644 index 00000000000000..3957bd34b875e7 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVScheduleZvk.td @@ -0,0 +1,202 @@ +//===- RISCVScheduleB.td - RISC-V Scheduling Definitions B -*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// Define scheduler resources associated with def operands. + +/// Zvbb extension +defm "" : LMULSchedWrites<"WriteVBREV">; +defm "" : LMULSchedWrites<"WriteVCLZ">; +defm "" : LMULSchedWrites<"WriteVCPOP">; +defm "" : LMULSchedWrites<"WriteVCTZ">; +defm "" : LMULSchedWrites<"WriteVWSLL">; + +/// Zvbc extension +defm "" : LMULSchedWrites<"WriteVCLMUL">; +defm "" : LMULSchedWrites<"WriteVCLMULH">; + +/// Zvkb extension +defm "" : LMULSchedWrites<"WriteVANDN">; +defm "" : LMULSchedWrites<"WriteVBREV8">; +defm "" : LMULSchedWrites<"WriteVREV8">; +defm "" : LMULSchedWrites<"WriteVROL">; +defm "" : LMULSchedWrites<"WriteVROR">; + +/// Zvkg extension +defm "" : LMULSchedWrites<"WriteVGHSH">; +defm "" : LMULSchedWrites<"WriteVGMUL">; + +/// Zvknha or Zvknhb extensions +defm "" : LMULSchedWrites<"WriteVSHA2CH">; +defm "" : LMULSchedWrites<"WriteVSHA2CL">; +defm "" : LMULSchedWrites<"WriteVSHA2MS">; + +/// Zvkned extension +defm "" : LMULSchedWrites<"WriteVAESMV">; +defm "" : LMULSchedWrites<"WriteVAESKF1">; +defm "" : LMULSchedWrites<"WriteVAESKF2">; +defm "" : LMULSchedWrites<"WriteVAESZ">; + +/// Zvksed extension +defm "" : LMULSchedWrites<"WriteVSM4K">; +defm "" : LMULSchedWrites<"WriteVSM4R">; + +/// Zvksh extension +defm "" : LMULSchedWrites<"WriteVSM3C">; +defm "" : LMULSchedWrites<"WriteVSM3ME">; + +/// Define scheduler resources associated with use operands. +/// Zvbb extension +defm "" : LMULSchedReads<"ReadVBREV">; +defm "" : LMULSchedReads<"ReadVCLZ">; +defm "" : LMULSchedReads<"ReadVCPOP">; +defm "" : LMULSchedReads<"ReadVCTZ">; +defm "" : LMULSchedReads<"ReadVWSLL">; + +/// Zvbc extension +defm "" : LMULSchedReads<"ReadVCLMUL">; +defm "" : LMULSchedReads<"ReadVCLMULH">; + +/// Zvkb extension +defm "" : LMULSchedReads<"ReadVANDN">; +defm "" : LMULSchedReads<"ReadVBREV8">; +defm "" : LMULSchedReads<"ReadVREV8">; +defm "" : LMULSchedReads<"ReadVROL">; +defm "" : LMULSchedReads<"ReadVROR">; + +/// Zvkg extension +defm "" : LMULSchedReads<"ReadVGHSH">; +defm "" : LMULSchedReads<"ReadVGMUL">; + +/// Zvknha or Zvknhb extensions +defm "" : LMULSchedReads<"ReadVSHA2CH">; +defm "" : LMULSchedReads<"ReadVSHA2CL">; +defm "" : LMULSchedReads<"ReadVSHA2MS">; + +/// Zvkned extension +defm "" : LMULSchedReads<"ReadVAESMV">; +defm "" : LMULSchedReads<"ReadVAESKF1">; +defm "" : LMULSchedReads<"ReadVAESKF2">; +defm "" : LMULSchedReads<"ReadVAESZ">; + +/// Zvksed extension +defm "" : LMULSchedReads<"ReadVSM4K">; +defm "" : LMULSchedReads<"ReadVSM4R">; + +/// Zvksh extension +defm "" : LMULSchedReads<"ReadVSM3C">; +defm "" : LMULSchedReads<"ReadVSM3ME">; + +multiclass UnsupportedSchedZvbb { +let Unsupported = true in { +defm "" : LMULWriteRes<"WriteVBREV", []>; +defm "" : LMULWriteRes<"WriteVCLZ", []>; +defm "" : LMULWriteRes<"WriteVCPOP", []>; +defm "" : LMULWriteRes<"WriteVCTZ", []>; +defm "" : LMULWriteRes<"WriteVWSLL", []>; + +defm "" : LMULReadAdvance<"ReadVBREV", 0>; +defm "" : LMULReadAdvance<"ReadVCLZ", 0>; +defm "" : LMULReadAdvance<"ReadVCPOP", 0>; +defm "" : LMULReadAdvance<"ReadVCTZ", 0>; +defm "" : LMULReadAdvance<"ReadVWSLL", 0>; +} +} + +multiclass UnsupportedSchedZvbc { +let Unsupported = true in { +defm "" : LMULWriteRes<"WriteVCLMUL", []>; +defm "" : LMULWriteRes<"WriteVCLMULH", []>; + +defm "" : LMULReadAdvance<"ReadVCLMUL", 0>; +defm "" : LMULReadAdvance<"ReadVCLMULH", 0>; +} +} + +multiclass UnsupportedSchedZvkb { +let Unsupported = true in { +defm "" : LMULWriteRes<"WriteVANDN", []>; +defm "" : LMULWriteRes<"WriteVBREV8", []>; +defm "" : LMULWriteRes<"WriteVREV8", []>; +defm "" : LMULWriteRes<"WriteVROL", []>; +defm "" : LMULWriteRes<"WriteVROR", []>; + +defm "" : LMULReadAdvance<"ReadVANDN", 0>; +defm "" : LMULReadAdvance<"ReadVBREV8", 0>; +defm "" : LMULReadAdvance<"ReadVREV8", 0>; +defm "" : LMULReadAdvance<"ReadVROL", 0>; +defm "" : LMULReadAdvance<"ReadVROR", 0>; +} +} + +multiclass UnsupportedSchedZvkg { +let Unsupported = true in { +defm "" : LMULWriteRes<"WriteVGHSH", []>; +defm "" : LMULWriteRes<"WriteVGMUL", []>; + +defm "" : LMULReadAdvance<"ReadVGHSH", 0>; +defm "" : LMULReadAdvance<"ReadVGMUL", 0>; +} +} + +multiclass UnsupportedSchedZvknhaOrZvknhb { +let Unsupported = true in { +defm "" : LMULWriteRes<"WriteVSHA2CH", []>; +defm "" : LMULWriteRes<"WriteVSHA2CL", []>; +defm "" : LMULWriteRes<"WriteVSHA2MS", []>; + +defm "" : LMULReadAdvance<"ReadVSHA2CH", 0>; +defm "" : LMULReadAdvance<"ReadVSHA2CL", 0>; +defm "" : LMULReadAdvance<"ReadVSHA2MS", 0>; +} +} + +multiclass UnsupportedSchedZvkned { +let Unsupported = true in { +defm "" : LMULWriteRes<"WriteVAESMV", []>; +defm "" : LMULWriteRes<"WriteVAESKF1", []>; +defm "" : LMULWriteRes<"WriteVAESKF2", []>; +defm "" : LMULWriteRes<"WriteVAESZ", []>; + +defm "" : LMULReadAdvance<"ReadVAESMV", 0>; +defm "" : LMULReadAdvance<"ReadVAESKF1", 0>; +defm "" : LMULReadAdvance<"ReadVAESKF2", 0>; +defm "" : LMULReadAdvance<"ReadVAESZ", 0>; +} +} + +multiclass UnsupportedSchedZvksed { +let Unsupported = true in { +defm "" : LMULWriteRes<"WriteVSM4K", []>; +defm "" : LMULWriteRes<"WriteVSM4R", []>; + +defm "" : LMULReadAdvance<"ReadVSM4K", 0>; +defm "" : LMULReadAdvance<"ReadVSM4R", 0>; +} +} + +multiclass UnsupportedSchedZvksh { +let Unsupported = true in { +defm "" : LMULWriteRes<"WriteVSM3C", []>; +defm "" : LMULWriteRes<"WriteVSM3ME", []>; + +defm "" : LMULReadAdvance<"ReadVSM3C", 0>; +defm "" : LMULReadAdvance<"ReadVSM3ME", 0>; +} +} + +// Helper class to define all RISC-V Vector Crypto extensions as unsupported +multiclass UnsupportedSchedZvk { +defm "" : UnsupportedSchedZvbb; +defm "" : UnsupportedSchedZvbc; +defm "" : UnsupportedSchedZvkb; +defm "" : UnsupportedSchedZvkg; +defm "" : UnsupportedSchedZvknhaOrZvknhb; +defm "" : UnsupportedSchedZvkned; +defm "" : UnsupportedSchedZvksed; +defm "" : UnsupportedSchedZvksh; +} diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s index 4207477d0e7ae2..ffdc44cf8408a8 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s @@ -171,136 +171,136 @@ vwsll.vi v8, v4, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu # CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12 # CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vbrev.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vbrev8.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vrev8.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vclz.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vctz.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vcpop.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vclz.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vctz.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vrol.vx v4, v8, a0 # CHECK-NEXT: 1 1 0.50 vror.vv v4, v8, v12 # CHECK-NEXT: 1 1 0.50 vror.vx v4, v8, a0 # CHECK-NEXT: 1 1 0.50 vror.vi v4, v8, 8 # CHECK-NEXT: 1 6 0.50 vwsll.vv v4, v8, v12 # CHECK-NEXT: 1 6 0.50 vwsll.vx v4, v8, a0 -# CHECK-NEXT: 1 6 0.50 vwsll.vi v4, v8, 8 +# CHECK-NEXT: 1 2 0.50 vwsll.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12 # CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vbrev.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vbrev8.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vrev8.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vclz.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vctz.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vcpop.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vclz.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vctz.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vrol.vx v4, v8, a0 # CHECK-NEXT: 1 1 0.50 vror.vv v4, v8, v12 # CHECK-NEXT: 1 1 0.50 vror.vx v4, v8, a0 # CHECK-NEXT: 1 1 0.50 vror.vi v4, v8, 8 # CHECK-NEXT: 1 6 0.50 vwsll.vv v4, v8, v12 # CHECK-NEXT: 1 6 0.50 vwsll.vx v4, v8, a0 -# CHECK-NEXT: 1 6 0.50 vwsll.vi v4, v8, 8 +# CHECK-NEXT: 1 2 0.50 vwsll.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12 # CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vbrev.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vbrev8.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vrev8.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vclz.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vctz.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vcpop.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vclz.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vctz.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vrol.vx v4, v8, a0 # CHECK-NEXT: 1 1 0.50 vror.vv v4, v8, v12 # CHECK-NEXT: 1 1 0.50 vror.vx v4, v8, a0 # CHECK-NEXT: 1 1 0.50 vror.vi v4, v8, 8 # CHECK-NEXT: 1 6 0.50 vwsll.vv v4, v8, v12 # CHECK-NEXT: 1 6 0.50 vwsll.vx v4, v8, a0 -# CHECK-NEXT: 1 6 0.50 vwsll.vi v4, v8, 8 +# CHECK-NEXT: 1 2 0.50 vwsll.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu # CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12 # CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vbrev.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vbrev8.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vrev8.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vclz.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vctz.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vcpop.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vclz.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vctz.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vrol.vx v4, v8, a0 # CHECK-NEXT: 1 1 0.50 vror.vv v4, v8, v12 # CHECK-NEXT: 1 1 0.50 vror.vx v4, v8, a0 # CHECK-NEXT: 1 1 0.50 vror.vi v4, v8, 8 # CHECK-NEXT: 1 6 0.50 vwsll.vv v4, v8, v12 # CHECK-NEXT: 1 6 0.50 vwsll.vx v4, v8, a0 -# CHECK-NEXT: 1 6 0.50 vwsll.vi v4, v8, 8 +# CHECK-NEXT: 1 2 0.50 vwsll.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu # CHECK-NEXT: 1 1 1.00 vandn.vv v4, v8, v12 # CHECK-NEXT: 1 1 1.00 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 1 1.00 vbrev.v v4, v8 -# CHECK-NEXT: 1 1 1.00 vbrev8.v v4, v8 -# CHECK-NEXT: 1 1 1.00 vrev8.v v4, v8 -# CHECK-NEXT: 1 1 1.00 vclz.v v4, v8 -# CHECK-NEXT: 1 1 1.00 vctz.v v4, v8 -# CHECK-NEXT: 1 1 1.00 vcpop.v v4, v8 -# CHECK-NEXT: 1 1 1.00 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vclz.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vctz.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vrol.vx v4, v8, a0 # CHECK-NEXT: 1 1 1.00 vror.vv v4, v8, v12 # CHECK-NEXT: 1 1 1.00 vror.vx v4, v8, a0 # CHECK-NEXT: 1 1 1.00 vror.vi v4, v8, 8 # CHECK-NEXT: 1 6 1.00 vwsll.vv v4, v8, v12 # CHECK-NEXT: 1 6 1.00 vwsll.vx v4, v8, a0 -# CHECK-NEXT: 1 6 1.00 vwsll.vi v4, v8, 8 +# CHECK-NEXT: 1 2 1.00 vwsll.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu # CHECK-NEXT: 1 1 2.00 vandn.vv v4, v8, v12 # CHECK-NEXT: 1 1 2.00 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 1 2.00 vbrev.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vbrev8.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vrev8.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vclz.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vctz.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vcpop.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 2.00 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vclz.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vctz.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 2.00 vrol.vx v4, v8, a0 # CHECK-NEXT: 1 1 2.00 vror.vv v4, v8, v12 # CHECK-NEXT: 1 1 2.00 vror.vx v4, v8, a0 # CHECK-NEXT: 1 1 2.00 vror.vi v4, v8, 8 # CHECK-NEXT: 1 6 2.00 vwsll.vv v8, v4, v12 # CHECK-NEXT: 1 6 2.00 vwsll.vx v8, v4, a0 -# CHECK-NEXT: 1 6 2.00 vwsll.vi v8, v4, 8 +# CHECK-NEXT: 1 2 2.00 vwsll.vi v8, v4, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: 1 1 4.00 vandn.vv v8, v16, v24 # CHECK-NEXT: 1 1 4.00 vandn.vx v8, v16, a0 -# CHECK-NEXT: 1 1 4.00 vbrev.v v8, v16 -# CHECK-NEXT: 1 1 4.00 vbrev8.v v8, v16 -# CHECK-NEXT: 1 1 4.00 vrev8.v v8, v16 -# CHECK-NEXT: 1 1 4.00 vclz.v v8, v16 -# CHECK-NEXT: 1 1 4.00 vctz.v v8, v16 -# CHECK-NEXT: 1 1 4.00 vcpop.v v8, v16 -# CHECK-NEXT: 1 1 4.00 vrol.vv v8, v16, v24 -# CHECK-NEXT: 1 1 4.00 vrol.vx v8, v16, a0 +# CHECK-NEXT: 1 2 4.00 vbrev.v v8, v16 +# CHECK-NEXT: 1 2 4.00 vbrev8.v v8, v16 +# CHECK-NEXT: 1 2 4.00 vrev8.v v8, v16 +# CHECK-NEXT: 1 2 4.00 vclz.v v8, v16 +# CHECK-NEXT: 1 2 4.00 vctz.v v8, v16 +# CHECK-NEXT: 1 2 4.00 vcpop.v v8, v16 +# CHECK-NEXT: 1 2 4.00 vrol.vv v8, v16, v24 +# CHECK-NEXT: 1 2 4.00 vrol.vx v8, v16, a0 # CHECK-NEXT: 1 1 4.00 vror.vv v8, v16, v24 # CHECK-NEXT: 1 1 4.00 vror.vx v8, v16, a0 # CHECK-NEXT: 1 1 4.00 vror.vi v8, v16, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m4, tu, mu # CHECK-NEXT: 1 1 2.00 vandn.vv v4, v8, v12 # CHECK-NEXT: 1 1 2.00 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 1 2.00 vbrev.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vbrev8.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vrev8.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vclz.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vctz.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vcpop.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 2.00 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vclz.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vctz.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 2.00 vrol.vx v4, v8, a0 # CHECK-NEXT: 1 1 2.00 vror.vv v4, v8, v12 # CHECK-NEXT: 1 1 2.00 vror.vx v4, v8, a0 # CHECK-NEXT: 1 1 2.00 vror.vi v4, v8, 8 # CHECK-NEXT: 1 6 2.00 vwsll.vv v8, v4, v12 # CHECK-NEXT: 1 6 2.00 vwsll.vx v8, v4, a0 -# CHECK-NEXT: 1 6 2.00 vwsll.vi v8, v4, 8 +# CHECK-NEXT: 1 2 2.00 vwsll.vi v8, v4, 8 # CHECK: Resources: # CHECK-NEXT: [0] - SiFiveP600Div diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s index 291befcd8ba442..faf75234ff3b78 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s @@ -29,12 +29,12 @@ vclmulh.vx v8, v12, a0 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 20 -# CHECK-NEXT: Total Cycles: 27 +# CHECK-NEXT: Total Cycles: 28 # CHECK-NEXT: Total uOps: 20 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.74 -# CHECK-NEXT: IPC: 0.74 +# CHECK-NEXT: uOps Per Cycle: 0.71 +# CHECK-NEXT: IPC: 0.71 # CHECK-NEXT: Block RThroughput: 30.0 # CHECK: Instruction Info: @@ -47,25 +47,25 @@ vclmulh.vx v8, v12, a0 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 0.50 vclmul.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vclmul.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vclmulh.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vclmulh.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vclmul.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vclmul.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vclmulh.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vclmulh.vx v4, v8, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 vclmul.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 vclmul.vx v4, v8, a0 -# CHECK-NEXT: 1 1 1.00 vclmulh.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 vclmulh.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vclmul.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vclmul.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vclmulh.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vclmulh.vx v4, v8, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 1 2.00 vclmul.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vclmul.vx v4, v8, a0 -# CHECK-NEXT: 1 1 2.00 vclmulh.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vclmulh.vx v4, v8, a0 +# CHECK-NEXT: 1 2 2.00 vclmul.vv v4, v8, v12 +# CHECK-NEXT: 1 2 2.00 vclmul.vx v4, v8, a0 +# CHECK-NEXT: 1 2 2.00 vclmulh.vv v4, v8, v12 +# CHECK-NEXT: 1 2 2.00 vclmulh.vx v4, v8, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, tu, mu -# CHECK-NEXT: 1 1 4.00 vclmul.vv v8, v12, v24 -# CHECK-NEXT: 1 1 4.00 vclmul.vx v8, v12, a0 -# CHECK-NEXT: 1 1 4.00 vclmulh.vv v8, v12, v24 -# CHECK-NEXT: 1 1 4.00 vclmulh.vx v8, v12, a0 +# CHECK-NEXT: 1 2 4.00 vclmul.vv v8, v12, v24 +# CHECK-NEXT: 1 2 4.00 vclmul.vx v8, v12, a0 +# CHECK-NEXT: 1 2 4.00 vclmulh.vv v8, v12, v24 +# CHECK-NEXT: 1 2 4.00 vclmulh.vx v8, v12, a0 # CHECK: Resources: # CHECK-NEXT: [0] - SiFiveP600Div diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s index 9a64ac92769460..2e82aa6f40d572 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s @@ -36,12 +36,12 @@ vgmul.vv v4, v8 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 24 -# CHECK-NEXT: Total Cycles: 38 +# CHECK-NEXT: Total Cycles: 39 # CHECK-NEXT: Total uOps: 24 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.63 -# CHECK-NEXT: IPC: 0.63 +# CHECK-NEXT: uOps Per Cycle: 0.62 +# CHECK-NEXT: IPC: 0.62 # CHECK-NEXT: Block RThroughput: 36.0 # CHECK: Instruction Info: @@ -54,11 +54,11 @@ vgmul.vv v4, v8 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 4.00 vghsh.vv v4, v8, v12 -# CHECK-NEXT: 1 1 4.00 vgmul.vv v4, v8 +# CHECK-NEXT: 1 2 4.00 vghsh.vv v4, v8, v12 +# CHECK-NEXT: 1 2 4.00 vgmul.vv v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 4.00 vghsh.vv v4, v8, v12 -# CHECK-NEXT: 1 1 4.00 vgmul.vv v4, v8 +# CHECK-NEXT: 1 2 4.00 vghsh.vv v4, v8, v12 +# CHECK-NEXT: 1 2 4.00 vgmul.vv v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: 1 1 0.50 vghsh.vv v4, v8, v12 # CHECK-NEXT: 1 1 0.50 vgmul.vv v4, v8 @@ -98,7 +98,7 @@ vgmul.vv v4, v8 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] -# CHECK-NEXT: - - - - 8.00 - - - - - - 35.00 37.00 - - - +# CHECK-NEXT: - - - - 8.00 - - - - - - 34.00 38.00 - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: @@ -110,7 +110,7 @@ vgmul.vv v4, v8 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vgmul.vv v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vghsh.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vgmul.vv v4, v8 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vgmul.vv v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vghsh.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vgmul.vv v4, v8