diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index f222ba9c20a26..d091077f729b8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -24,9 +24,9 @@ def tuimm5 : RISCVOp, TImmLeaf(Imm);}]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { multiclass VCLMUL_MV_V_X funct6> { def V : VALUVV, - SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; + SchedBinaryMC<"WriteVCLMULV", "ReadVCLMULV", "ReadVCLMULV">; def X : VALUVX, - SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; + SchedBinaryMC<"WriteVCLMULX", "ReadVCLMULV", "ReadVCLMULX">; } class RVInstIVI_VROR funct6, dag outs, dag ins, string opcodestr, @@ -55,7 +55,7 @@ multiclass VROR_IV_V_X_I funct6> def I : RVInstIVI_VROR, - SchedUnaryMC<"WriteVIALUI", "ReadVIALUV">; + SchedUnaryMC<"WriteVRotI", "ReadVRotV">; } // op vd, vs2, vs1 @@ -107,10 +107,10 @@ multiclass VAES_MV_V_S funct6_vv, bits<6> funct6_vs, bits<5> vs1, RISCVVFormat opv, string opcodestr> { let RVVConstraint = NoConstraint in def NAME # _VV : PALUVs2NoVmBinary, - SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; + SchedBinaryMC<"WriteVAESMVV", "ReadVAESMVV", "ReadVAESMVV">; let RVVConstraint = VS2Constraint in def NAME # _VS : PALUVs2NoVmBinary, - SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; + SchedBinaryMC<"WriteVAESMVV", "ReadVAESMVV", "ReadVAESMVV">; } } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 @@ -142,22 +142,22 @@ let Predicates = [HasStdExtZvkb] in { let Predicates = [HasStdExtZvkg], RVVConstraint = NoConstraint in { def VGHSH_VV : PALUVVNoVmTernary<0b101100, OPMVV, "vghsh.vv">, - SchedTernaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", - "ReadVIALUV">; + SchedTernaryMC<"WriteVGHSHV", "ReadVGHSHV", "ReadVGHSHV", + "ReadVGHSHV">; def VGMUL_VV : PALUVs2NoVmBinary<0b101000, 0b10001, OPMVV, "vgmul.vv">, - SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; + SchedBinaryMC<"WriteVGMULV", "ReadVGMULV", "ReadVGMULV">; } // Predicates = [HasStdExtZvkg] let Predicates = [HasStdExtZvknhaOrZvknhb], RVVConstraint = Sha2Constraint in { def VSHA2CH_VV : PALUVVNoVmTernary<0b101110, OPMVV, "vsha2ch.vv">, - SchedTernaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", - "ReadVIALUV">; + SchedTernaryMC<"WriteVSHA2CHV", "ReadVSHA2CHV", "ReadVSHA2CHV", + "ReadVSHA2CHV">; def VSHA2CL_VV : PALUVVNoVmTernary<0b101111, OPMVV, "vsha2cl.vv">, - SchedTernaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", - "ReadVIALUV">; + SchedTernaryMC<"WriteVSHA2CLV", "ReadVSHA2CLV", "ReadVSHA2CLV", + "ReadVSHA2CLV">; def VSHA2MS_VV : PALUVVNoVmTernary<0b101101, OPMVV, "vsha2ms.vv">, - SchedTernaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", - "ReadVIALUV">; + SchedTernaryMC<"WriteVSHA2MSV", "ReadVSHA2MSV", "ReadVSHA2MSV", + "ReadVSHA2MSV">; } // Predicates = [HasStdExtZvknhaOrZvknhb] let Predicates = [HasStdExtZvkned] in { @@ -166,26 +166,26 @@ let Predicates = [HasStdExtZvkned] in { defm VAESEF : VAES_MV_V_S<0b101000, 0b101001, 0b00011, OPMVV, "vaesef">; defm VAESEM : VAES_MV_V_S<0b101000, 0b101001, 0b00010, OPMVV, "vaesem">; def VAESKF1_VI : PALUVINoVm<0b100010, "vaeskf1.vi", uimm5>, - SchedUnaryMC<"WriteVIALUV", "ReadVIALUV">; + SchedUnaryMC<"WriteVAESKF1V", "ReadVAESKF1V">; def VAESKF2_VI : PALUVINoVmBinary<0b101010, "vaeskf2.vi", uimm5>, - SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; + SchedBinaryMC<"WriteVAESKF2V", "ReadVAESKF2V", "ReadVAESKF2V">; let RVVConstraint = VS2Constraint in def VAESZ_VS : PALUVs2NoVmBinary<0b101001, 0b00111, OPMVV, "vaesz.vs">, - SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; + SchedBinaryMC<"WriteVAESZV", "ReadVAESZV", "ReadVAESZV">; } // Predicates = [HasStdExtZvkned] let Predicates = [HasStdExtZvksed] in { let RVVConstraint = NoConstraint in def VSM4K_VI : PALUVINoVm<0b100001, "vsm4k.vi", uimm5>, - SchedUnaryMC<"WriteVIALUV", "ReadVIALUV">; + SchedUnaryMC<"WriteVSM4KV", "ReadVSM4KV">; defm VSM4R : VAES_MV_V_S<0b101000, 0b101001, 0b10000, OPMVV, "vsm4r">; } // Predicates = [HasStdExtZvksed] let Predicates = [HasStdExtZvksh], RVVConstraint = VS2Constraint in { def VSM3C_VI : PALUVINoVmBinary<0b101011, "vsm3c.vi", uimm5>, - SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; + SchedBinaryMC<"WriteVSM3CV", "ReadVSM3CV", "ReadVSM3CV">; def VSM3ME_VV : PALUVVNoVm<0b100000, OPMVV, "vsm3me.vv">, - SchedUnaryMC<"WriteVIALUI", "ReadVIALUV">; + SchedUnaryMC<"WriteVSM3MEV", "ReadVSM3MEV">; } // Predicates = [HasStdExtZvksh] //===----------------------------------------------------------------------===// @@ -280,55 +280,121 @@ multiclass VPseudoBinaryV_S_NoMask_Zvk { def "_VS_" # m.MX # "_" # vs2_lmul.MX : VPseudoBinaryNoMask_Zvk; } -multiclass VPseudoVALU_V_NoMask_Zvk { +multiclass VPseudoVGMUL { foreach m = MxListVF4 in { defvar mx = m.MX; defm "" : VPseudoBinaryV_V_NoMask_Zvk, - SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", mx>; + SchedBinary<"WriteVGMULV", "ReadVGMULV", "ReadVGMULV", mx>; } } -multiclass VPseudoVALU_S_NoMask_Zvk { +multiclass VPseudoVAESMV { foreach m = MxListVF4 in { defvar mx = m.MX; + defm "" : VPseudoBinaryV_V_NoMask_Zvk, + SchedBinary<"WriteVAESMVV", "ReadVAESMVV", "ReadVAESMVV", mx>; defm "" : VPseudoBinaryV_S_NoMask_Zvk, - SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", mx>; + SchedBinary<"WriteVAESMVV", "ReadVAESMVV", "ReadVAESMVV", mx>; + + } +} + +multiclass VPseudoVSM4R { + foreach m = MxListVF4 in { + defvar mx = m.MX; + defm "" : VPseudoBinaryV_V_NoMask_Zvk, + SchedBinary<"WriteVSM4RV", "ReadVSM4RV", "ReadVSM4RV", mx>; + defm "" : VPseudoBinaryV_S_NoMask_Zvk, + SchedBinary<"WriteVSM4RV", "ReadVSM4RV", "ReadVSM4RV", mx>; + + } +} + +multiclass VPseudoVGHSH { + foreach m = MxListVF4 in { + defvar mx = m.MX; + defm _VV : VPseudoTernaryNoMask_Zvk, + SchedTernary<"WriteVGHSHV", "ReadVGHSHV", "ReadVGHSHV", + "ReadVGHSHV", mx>; } } -multiclass VPseudoVALU_V_S_NoMask_Zvk - : VPseudoVALU_V_NoMask_Zvk, VPseudoVALU_S_NoMask_Zvk; +multiclass VPseudoVSHA2CH { + foreach m = MxListVF4 in { + defvar mx = m.MX; + defm _VV : VPseudoTernaryNoMask_Zvk, + SchedTernary<"WriteVSHA2CHV", "ReadVSHA2CHV", "ReadVSHA2CHV", + "ReadVSHA2CHV", mx>; + } +} + +multiclass VPseudoVSHA2CL { + foreach m = MxListVF4 in { + defvar mx = m.MX; + defm _VV : VPseudoTernaryNoMask_Zvk, + SchedTernary<"WriteVSHA2CLV", "ReadVSHA2CLV", "ReadVSHA2CLV", + "ReadVSHA2CLV", mx>; + } +} -multiclass VPseudoVALU_VV_NoMask_Zvk { +multiclass VPseudoVSHA2MS { foreach m = MxListVF4 in { defvar mx = m.MX; defm _VV : VPseudoTernaryNoMask_Zvk, - SchedTernary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", "ReadVIALUV", mx>; + SchedTernary<"WriteVSHA2MSV", "ReadVSHA2MSV", "ReadVSHA2MSV", + "ReadVSHA2MSV", mx>; + } +} + +multiclass VPseudoVAESKF1 { + foreach m = MxListVF4 in { + defvar mx = m.MX; + defm _VI : VPseudoBinaryNoMaskTU_Zvk, + SchedBinary<"WriteVAESKF1V", "ReadVAESKF1V", "ReadVAESKF1V", mx, + forceMergeOpRead=true>; + } +} + +multiclass VPseudoVAESKF2 { + foreach m = MxListVF4 in { + defvar mx = m.MX; + defm _VI : VPseudoTernaryNoMask_Zvk, + SchedTernary<"WriteVAESKF2V", "ReadVAESKF2V", "ReadVAESKF2V", + "ReadVAESKF2V", mx>; + } +} + +multiclass VPseudoVAESZ { + foreach m = MxListVF4 in { + defvar mx = m.MX; + defm "" : VPseudoBinaryV_S_NoMask_Zvk, + SchedBinary<"WriteVAESZV", "ReadVAESZV", "ReadVAESZV", mx>; } } -multiclass VPseudoVALU_VI_NoMask_Zvk { +multiclass VPseudoVSM3C { foreach m = MxListVF4 in { defvar mx = m.MX; defm _VI : VPseudoTernaryNoMask_Zvk, - SchedTernary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", "ReadVIALUV", mx>; + SchedTernary<"WriteVSM3CV", "ReadVSM3CV", "ReadVSM3CV", + "ReadVSM3CV", mx>; } } -multiclass VPseudoVALU_VI_NoMaskTU_Zvk { +multiclass VPseudoVSM4K { foreach m = MxListVF4 in { defvar mx = m.MX; defm _VI : VPseudoBinaryNoMaskTU_Zvk, - SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", mx, + SchedBinary<"WriteVSM4KV", "ReadVSM4KV", "ReadVSM4KV", mx, forceMergeOpRead=true>; } } -multiclass VPseudoVALU_VV_NoMaskTU_Zvk { +multiclass VPseudoVSM3ME { foreach m = MxListVF4 in { defvar mx = m.MX; defm _VV : VPseudoBinaryNoMaskTU_Zvk, - SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", mx, + SchedBinary<"WriteVSM3MEV", "ReadVSM3MEV", "ReadVSM3MEV", mx, forceMergeOpRead=true>; } } @@ -337,10 +403,10 @@ multiclass VPseudoVCLMUL_VV_VX { foreach m = MxList in { defvar mx = m.MX; defm "" : VPseudoBinaryV_VV, - SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", mx, + SchedBinary<"WriteVCLMULV", "ReadVCLMULV", "ReadVCLMULV", mx, forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VX, - SchedBinary<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX", mx, + SchedBinary<"WriteVCLMULX", "ReadVCLMULV", "ReadVCLMULX", mx, forceMergeOpRead=true>; } } @@ -354,28 +420,111 @@ multiclass VPseudoUnaryV_V { } } -multiclass VPseudoVALU_V { +multiclass VPseudoVBREV { foreach m = MxList in { defvar mx = m.MX; defm "" : VPseudoUnaryV_V, - SchedUnary<"WriteVIALUV", "ReadVIALUV", mx, - forceMergeOpRead=true>; + SchedUnary<"WriteVBREVV", "ReadVBREVV", mx, forceMergeOpRead=true>; } } -multiclass VPseudoVWALU_VV_VX_VI : VPseudoVWALU_VV_VX { +multiclass VPseudoVCLZ { + foreach m = MxList in { + defvar mx = m.MX; + defm "" : VPseudoUnaryV_V, + SchedUnary<"WriteVCLZV", "ReadVCLZV", mx, forceMergeOpRead=true>; + } +} + +multiclass VPseudoVCTZ { + foreach m = MxList in { + defvar mx = m.MX; + defm "" : VPseudoUnaryV_V, + SchedUnary<"WriteVCTZV", "ReadVCTZV", mx, forceMergeOpRead=true>; + } +} + +multiclass VPseudoVCPOP { + foreach m = MxList in { + defvar mx = m.MX; + defm "" : VPseudoUnaryV_V, + SchedUnary<"WriteVCPOPV", "ReadVCPOPV", mx, forceMergeOpRead=true>; + } +} + +multiclass VPseudoVWALU_VV_VX_VI { foreach m = MxListW in { + defvar mx = m.MX; + defm "" : VPseudoBinaryW_VV, + SchedBinary<"WriteVWSLLV", "ReadVWSLLV", "ReadVWSLLV", mx, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryW_VX, + SchedBinary<"WriteVWSLLX", "ReadVWSLLV", "ReadVWSLLX", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryW_VI, - SchedUnary<"WriteVIWALUV", "ReadVIWALUV", m.MX, + SchedUnary<"WriteVWSLLI", "ReadVWSLLV", mx, forceMergeOpRead=true>; } } +multiclass VPseudoVANDN { + foreach m = MxList in { + defm "" : VPseudoBinaryV_VV, + SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", m.MX, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_VX, + SchedBinary<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX", m.MX, + forceMergeOpRead=true>; + } +} + +multiclass VPseudoVBREV8 { + foreach m = MxList in { + defvar mx = m.MX; + defm "" : VPseudoUnaryV_V, + SchedUnary<"WriteVBREV8V", "ReadVBREV8V", mx, forceMergeOpRead=true>; + } +} + +multiclass VPseudoVREV8 { + foreach m = MxList in { + defvar mx = m.MX; + defm "" : VPseudoUnaryV_V, + SchedUnary<"WriteVREV8V", "ReadVREV8V", mx, forceMergeOpRead=true>; + } +} + +multiclass VPseudoVROL { + foreach m = MxList in { + defm "" : VPseudoBinaryV_VV, + SchedBinary<"WriteVRotV", "ReadVRotV", "ReadVRotV", m.MX, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_VX, + SchedBinary<"WriteVRotX", "ReadVRotV", "ReadVRotX", m.MX, + forceMergeOpRead=true>; + } +} + +multiclass VPseudoVROR { + defvar Constraint = ""; + foreach m = MxList in { + defvar mx = m.MX; + defm "" : VPseudoBinaryV_VV, + SchedBinary<"WriteVRotV", "ReadVRotV", "ReadVRotV", mx, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_VX, + SchedBinary<"WriteVRotX", "ReadVRotV", "ReadVRotX", mx, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_VI, + SchedUnary<"WriteVRotI", "ReadVRotV", mx, forceMergeOpRead=true>; + } +} + let Predicates = [HasStdExtZvbb] in { - defm PseudoVBREV : VPseudoVALU_V; - defm PseudoVCLZ : VPseudoVALU_V; - defm PseudoVCTZ : VPseudoVALU_V; - defm PseudoVCPOP : VPseudoVALU_V; + defm PseudoVBREV : VPseudoVBREV; + defm PseudoVCLZ : VPseudoVCLZ; + defm PseudoVCTZ : VPseudoVCTZ; + defm PseudoVCPOP : VPseudoVCPOP; defm PseudoVWSLL : VPseudoVWALU_VV_VX_VI; } // Predicates = [HasStdExtZvbb] @@ -385,42 +534,42 @@ let Predicates = [HasStdExtZvbc] in { } // Predicates = [HasStdExtZvbc] let Predicates = [HasStdExtZvkb] in { - defm PseudoVANDN : VPseudoVALU_VV_VX; - defm PseudoVBREV8 : VPseudoVALU_V; - defm PseudoVREV8 : VPseudoVALU_V; - defm PseudoVROL : VPseudoVALU_VV_VX; - defm PseudoVROR : VPseudoVALU_VV_VX_VI; + defm PseudoVANDN : VPseudoVANDN; + defm PseudoVBREV8 : VPseudoVBREV8; + defm PseudoVREV8 : VPseudoVREV8; + defm PseudoVROL : VPseudoVROL; + defm PseudoVROR : VPseudoVROR; } // Predicates = [HasStdExtZvkb] let Predicates = [HasStdExtZvkg] in { - defm PseudoVGHSH : VPseudoVALU_VV_NoMask_Zvk; - defm PseudoVGMUL : VPseudoVALU_V_NoMask_Zvk; + defm PseudoVGHSH : VPseudoVGHSH; + defm PseudoVGMUL : VPseudoVGMUL; } // Predicates = [HasStdExtZvkg] let Predicates = [HasStdExtZvkned] in { - defm PseudoVAESDF : VPseudoVALU_V_S_NoMask_Zvk; - defm PseudoVAESDM : VPseudoVALU_V_S_NoMask_Zvk; - defm PseudoVAESEF : VPseudoVALU_V_S_NoMask_Zvk; - defm PseudoVAESEM : VPseudoVALU_V_S_NoMask_Zvk; - defm PseudoVAESKF1 : VPseudoVALU_VI_NoMaskTU_Zvk; - defm PseudoVAESKF2 : VPseudoVALU_VI_NoMask_Zvk; - defm PseudoVAESZ : VPseudoVALU_S_NoMask_Zvk; + defm PseudoVAESDF : VPseudoVAESMV; + defm PseudoVAESDM : VPseudoVAESMV; + defm PseudoVAESEF : VPseudoVAESMV; + defm PseudoVAESEM : VPseudoVAESMV; + defm PseudoVAESKF1 : VPseudoVAESKF1; + defm PseudoVAESKF2 : VPseudoVAESKF2; + defm PseudoVAESZ : VPseudoVAESZ; } // Predicates = [HasStdExtZvkned] let Predicates = [HasStdExtZvknhaOrZvknhb] in { - defm PseudoVSHA2CH : VPseudoVALU_VV_NoMask_Zvk; - defm PseudoVSHA2CL : VPseudoVALU_VV_NoMask_Zvk; - defm PseudoVSHA2MS : VPseudoVALU_VV_NoMask_Zvk; + defm PseudoVSHA2CH : VPseudoVSHA2CH; + defm PseudoVSHA2CL : VPseudoVSHA2CL; + defm PseudoVSHA2MS : VPseudoVSHA2MS; } // Predicates = [HasStdExtZvknhaOrZvknhb] let Predicates = [HasStdExtZvksed] in { - defm PseudoVSM4K : VPseudoVALU_VI_NoMaskTU_Zvk; - defm PseudoVSM4R : VPseudoVALU_V_S_NoMask_Zvk; + defm PseudoVSM4K : VPseudoVSM4K; + defm PseudoVSM4R : VPseudoVSM4R; } // Predicates = [HasStdExtZvksed] let Predicates = [HasStdExtZvksh] in { - defm PseudoVSM3C : VPseudoVALU_VI_NoMask_Zvk; - defm PseudoVSM3ME : VPseudoVALU_VV_NoMaskTU_Zvk; + defm PseudoVSM3C : VPseudoVSM3C; + defm PseudoVSM3ME : VPseudoVSM3ME; } // Predicates = [HasStdExtZvksh] //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td index 65494e73758d6..9ddc4281092dd 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -262,4 +262,5 @@ defm : UnsupportedSchedZfa; defm : UnsupportedSchedZfh; defm : UnsupportedSchedSFB; defm : UnsupportedSchedXsfvcp; +defm : UnsupportedSchedZvk; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index a532066b3a1c8..e67da839bdb87 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -1298,4 +1298,5 @@ defm : UnsupportedSchedZbc; defm : UnsupportedSchedZbkb; defm : UnsupportedSchedZbkx; defm : UnsupportedSchedZfa; +defm : UnsupportedSchedZvk; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td index fccdd7e4f3ec2..a37958826e028 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td @@ -367,4 +367,5 @@ defm : UnsupportedSchedSFB; defm : UnsupportedSchedZfa; defm : UnsupportedSchedV; defm : UnsupportedSchedXsfvcp; +defm : UnsupportedSchedZvk; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td index 6e4fb19361f55..6ba299385f07e 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td @@ -748,6 +748,62 @@ foreach mx = SchedMxList in { } } +// Vector Crypto +foreach mx = SchedMxList in { + defvar LMulLat = SiFiveP600GetLMulCycles.c; + defvar IsWorstCase = SiFiveP600IsWorstCaseMX.c; + // Zvbb + let Latency = 2, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVBREVV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVCLZV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVCPOPV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVCTZV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVWSLLV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVWSLLX", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVWSLLI", [SiFiveP600VectorArith], mx, IsWorstCase>; + } + // Zvbc + let Latency = 2, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVCLMULV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVCLMULX", [SiFiveP600VectorArith], mx, IsWorstCase>; + } + // Zvkb + // VANDN uses WriteVIALU[V|X|I] + let Latency = 2, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVBREV8V", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVREV8V", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVRotV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVRotX", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVRotI", [SiFiveP600VectorArith], mx, IsWorstCase>; + } + // Zvkg + let Latency = 2, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVGHSHV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVGMULV", [SiFiveP600VectorArith], mx, IsWorstCase>; + } + // ZvknhaOrZvknhb + let Latency = 3, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSHA2MSV", [SiFiveP600VectorArith], mx, IsWorstCase>; + } + // Zvkned + let Latency = 2, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVAESMVV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAESKF1V", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAESKF2V", [SiFiveP600VectorArith], mx, IsWorstCase>; + } + let Latency = 1, ReleaseAtCycles = [LMulLat] in + defm "" : LMULWriteResMX<"WriteVAESZV", [SiFiveP600VectorArith], mx, IsWorstCase>; + // Zvksed + let Latency = 3, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVSM4KV", [SiFiveP600VEXQ0], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSM4RV", [SiFiveP600VEXQ0], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSM3CV", [SiFiveP600VEXQ0], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSM3MEV", [SiFiveP600VEXQ0], mx, IsWorstCase>; + } +} + // Others def : WriteRes; def : WriteRes; @@ -1032,6 +1088,42 @@ foreach mx = SchedMxList in { def : ReadAdvance("ReadVMergeOp_" # mx # "_E" # sew), 0>; } +// Vector Crypto Extensions +// Zvbb +defm "" : LMULReadAdvance<"ReadVBREVV", 0>; +defm "" : LMULReadAdvance<"ReadVCLZV", 0>; +defm "" : LMULReadAdvance<"ReadVCPOPV", 0>; +defm "" : LMULReadAdvance<"ReadVCTZV", 0>; +defm "" : LMULReadAdvance<"ReadVWSLLV", 0>; +defm "" : LMULReadAdvance<"ReadVWSLLX", 0>; +// Zvbc +defm "" : LMULReadAdvance<"ReadVCLMULV", 0>; +defm "" : LMULReadAdvance<"ReadVCLMULX", 0>; +// Zvkb +// VANDN uses ReadVIALU[V|X|I] +defm "" : LMULReadAdvance<"ReadVBREV8V", 0>; +defm "" : LMULReadAdvance<"ReadVREV8V", 0>; +defm "" : LMULReadAdvance<"ReadVRotV", 0>; +defm "" : LMULReadAdvance<"ReadVRotX", 0>; +// Zvkg +defm "" : LMULReadAdvance<"ReadVGHSHV", 0>; +defm "" : LMULReadAdvance<"ReadVGMULV", 0>; +// Zvknha or Zvknhb +defm "" : LMULReadAdvance<"ReadVSHA2CHV", 0>; +defm "" : LMULReadAdvance<"ReadVSHA2CLV", 0>; +defm "" : LMULReadAdvance<"ReadVSHA2MSV", 0>; +// Zvkned +defm "" : LMULReadAdvance<"ReadVAESMVV", 0>; +defm "" : LMULReadAdvance<"ReadVAESKF1V", 0>; +defm "" : LMULReadAdvance<"ReadVAESKF2V", 0>; +defm "" : LMULReadAdvance<"ReadVAESZV", 0>; +// Zvksed +defm "" : LMULReadAdvance<"ReadVSM4KV", 0>; +defm "" : LMULReadAdvance<"ReadVSM4RV", 0>; +// Zbksh +defm "" : LMULReadAdvance<"ReadVSM3CV", 0>; +defm "" : LMULReadAdvance<"ReadVSM3MEV", 0>; + //===----------------------------------------------------------------------===// // Unsupported extensions defm : UnsupportedSchedZabha; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td index 0885e325f24e6..31112d140cde7 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td @@ -213,4 +213,5 @@ defm : UnsupportedSchedZbkx; defm : UnsupportedSchedZfa; defm : UnsupportedSchedZfh; defm : UnsupportedSchedXsfvcp; +defm : UnsupportedSchedZvk; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td index e0f1fab1d6b40..dcd1a938a9147 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td +++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td @@ -312,4 +312,5 @@ defm : UnsupportedSchedZfh; defm : UnsupportedSchedSFB; defm : UnsupportedSchedZabha; defm : UnsupportedSchedXsfvcp; +defm : UnsupportedSchedZvk; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td index 0086557a41fe7..d9a2e38c0e9d7 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedule.td +++ b/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -297,3 +297,4 @@ def : ReadAdvance; include "RISCVScheduleZb.td" include "RISCVScheduleV.td" include "RISCVScheduleXSf.td" +include "RISCVScheduleZvk.td" diff --git a/llvm/lib/Target/RISCV/RISCVScheduleZvk.td b/llvm/lib/Target/RISCV/RISCVScheduleZvk.td new file mode 100644 index 0000000000000..640c456322f02 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVScheduleZvk.td @@ -0,0 +1,208 @@ +//=== RISCVScheduleZvk.td - RISC-V Scheduling Definitions Zvk -*- tablegen ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// Define scheduler resources associated with def operands. + +/// Zvbb extension +defm "" : LMULSchedWrites<"WriteVBREVV">; +defm "" : LMULSchedWrites<"WriteVCLZV">; +defm "" : LMULSchedWrites<"WriteVCPOPV">; +defm "" : LMULSchedWrites<"WriteVCTZV">; +defm "" : LMULSchedWrites<"WriteVWSLLV">; +defm "" : LMULSchedWrites<"WriteVWSLLX">; +defm "" : LMULSchedWrites<"WriteVWSLLI">; + +/// Zvbc extension +defm "" : LMULSchedWrites<"WriteVCLMULV">; +defm "" : LMULSchedWrites<"WriteVCLMULX">; + +/// Zvkb extension +// VANDN uses WriteVIALU[V|X|I] +defm "" : LMULSchedWrites<"WriteVBREV8V">; +defm "" : LMULSchedWrites<"WriteVREV8V">; +defm "" : LMULSchedWrites<"WriteVRotV">; +defm "" : LMULSchedWrites<"WriteVRotX">; +defm "" : LMULSchedWrites<"WriteVRotI">; + +/// Zvkg extension +defm "" : LMULSchedWrites<"WriteVGHSHV">; +defm "" : LMULSchedWrites<"WriteVGMULV">; + +/// Zvknha or Zvknhb extensions +defm "" : LMULSchedWrites<"WriteVSHA2CHV">; +defm "" : LMULSchedWrites<"WriteVSHA2CLV">; +defm "" : LMULSchedWrites<"WriteVSHA2MSV">; + +/// Zvkned extension +defm "" : LMULSchedWrites<"WriteVAESMVV">; +defm "" : LMULSchedWrites<"WriteVAESKF1V">; +defm "" : LMULSchedWrites<"WriteVAESKF2V">; +defm "" : LMULSchedWrites<"WriteVAESZV">; + +/// Zvksed extension +defm "" : LMULSchedWrites<"WriteVSM4KV">; +defm "" : LMULSchedWrites<"WriteVSM4RV">; + +/// Zvksh extension +defm "" : LMULSchedWrites<"WriteVSM3CV">; +defm "" : LMULSchedWrites<"WriteVSM3MEV">; + +/// Define scheduler resources associated with use operands. +/// Zvbb extension +defm "" : LMULSchedReads<"ReadVBREVV">; +defm "" : LMULSchedReads<"ReadVCLZV">; +defm "" : LMULSchedReads<"ReadVCPOPV">; +defm "" : LMULSchedReads<"ReadVCTZV">; +defm "" : LMULSchedReads<"ReadVWSLLV">; +defm "" : LMULSchedReads<"ReadVWSLLX">; + +/// Zvbc extension +defm "" : LMULSchedReads<"ReadVCLMULV">; +defm "" : LMULSchedReads<"ReadVCLMULX">; + +/// Zvkb extension +// VANDN uses ReadVIALU[V|X|I] +defm "" : LMULSchedReads<"ReadVBREV8V">; +defm "" : LMULSchedReads<"ReadVREV8V">; +defm "" : LMULSchedReads<"ReadVRotV">; +defm "" : LMULSchedReads<"ReadVRotX">; + +/// Zvkg extension +defm "" : LMULSchedReads<"ReadVGHSHV">; +defm "" : LMULSchedReads<"ReadVGMULV">; + +/// Zvknha or Zvknhb extensions +defm "" : LMULSchedReads<"ReadVSHA2CHV">; +defm "" : LMULSchedReads<"ReadVSHA2CLV">; +defm "" : LMULSchedReads<"ReadVSHA2MSV">; + +/// Zvkned extension +defm "" : LMULSchedReads<"ReadVAESMVV">; +defm "" : LMULSchedReads<"ReadVAESKF1V">; +defm "" : LMULSchedReads<"ReadVAESKF2V">; +defm "" : LMULSchedReads<"ReadVAESZV">; + +/// Zvksed extension +defm "" : LMULSchedReads<"ReadVSM4KV">; +defm "" : LMULSchedReads<"ReadVSM4RV">; + +/// Zvksh extension +defm "" : LMULSchedReads<"ReadVSM3CV">; +defm "" : LMULSchedReads<"ReadVSM3MEV">; + +multiclass UnsupportedSchedZvbb { +let Unsupported = true in { +defm "" : LMULWriteRes<"WriteVBREVV", []>; +defm "" : LMULWriteRes<"WriteVCLZV", []>; +defm "" : LMULWriteRes<"WriteVCPOPV", []>; +defm "" : LMULWriteRes<"WriteVCTZV", []>; +defm "" : LMULWriteRes<"WriteVWSLLV", []>; +defm "" : LMULWriteRes<"WriteVWSLLX", []>; +defm "" : LMULWriteRes<"WriteVWSLLI", []>; + +defm "" : LMULReadAdvance<"ReadVBREVV", 0>; +defm "" : LMULReadAdvance<"ReadVCLZV", 0>; +defm "" : LMULReadAdvance<"ReadVCPOPV", 0>; +defm "" : LMULReadAdvance<"ReadVCTZV", 0>; +defm "" : LMULReadAdvance<"ReadVWSLLV", 0>; +defm "" : LMULReadAdvance<"ReadVWSLLX", 0>; +} +} + +multiclass UnsupportedSchedZvbc { +let Unsupported = true in { +defm "" : LMULWriteRes<"WriteVCLMULV", []>; +defm "" : LMULWriteRes<"WriteVCLMULX", []>; + +defm "" : LMULReadAdvance<"ReadVCLMULV", 0>; +defm "" : LMULReadAdvance<"ReadVCLMULX", 0>; +} +} + +multiclass UnsupportedSchedZvkb { +let Unsupported = true in { +defm "" : LMULWriteRes<"WriteVBREV8V", []>; +defm "" : LMULWriteRes<"WriteVREV8V", []>; +defm "" : LMULWriteRes<"WriteVRotV", []>; +defm "" : LMULWriteRes<"WriteVRotX", []>; +defm "" : LMULWriteRes<"WriteVRotI", []>; + +defm "" : LMULReadAdvance<"ReadVBREV8V", 0>; +defm "" : LMULReadAdvance<"ReadVREV8V", 0>; +defm "" : LMULReadAdvance<"ReadVRotV", 0>; +defm "" : LMULReadAdvance<"ReadVRotX", 0>; +} +} + +multiclass UnsupportedSchedZvkg { +let Unsupported = true in { +defm "" : LMULWriteRes<"WriteVGHSHV", []>; +defm "" : LMULWriteRes<"WriteVGMULV", []>; + +defm "" : LMULReadAdvance<"ReadVGHSHV", 0>; +defm "" : LMULReadAdvance<"ReadVGMULV", 0>; +} +} + +multiclass UnsupportedSchedZvknhaOrZvknhb { +let Unsupported = true in { +defm "" : LMULWriteRes<"WriteVSHA2CHV", []>; +defm "" : LMULWriteRes<"WriteVSHA2CLV", []>; +defm "" : LMULWriteRes<"WriteVSHA2MSV", []>; + +defm "" : LMULReadAdvance<"ReadVSHA2CHV", 0>; +defm "" : LMULReadAdvance<"ReadVSHA2CLV", 0>; +defm "" : LMULReadAdvance<"ReadVSHA2MSV", 0>; +} +} + +multiclass UnsupportedSchedZvkned { +let Unsupported = true in { +defm "" : LMULWriteRes<"WriteVAESMVV", []>; +defm "" : LMULWriteRes<"WriteVAESKF1V", []>; +defm "" : LMULWriteRes<"WriteVAESKF2V", []>; +defm "" : LMULWriteRes<"WriteVAESZV", []>; + +defm "" : LMULReadAdvance<"ReadVAESMVV", 0>; +defm "" : LMULReadAdvance<"ReadVAESKF1V", 0>; +defm "" : LMULReadAdvance<"ReadVAESKF2V", 0>; +defm "" : LMULReadAdvance<"ReadVAESZV", 0>; +} +} + +multiclass UnsupportedSchedZvksed { +let Unsupported = true in { +defm "" : LMULWriteRes<"WriteVSM4KV", []>; +defm "" : LMULWriteRes<"WriteVSM4RV", []>; + +defm "" : LMULReadAdvance<"ReadVSM4KV", 0>; +defm "" : LMULReadAdvance<"ReadVSM4RV", 0>; +} +} + +multiclass UnsupportedSchedZvksh { +let Unsupported = true in { +defm "" : LMULWriteRes<"WriteVSM3CV", []>; +defm "" : LMULWriteRes<"WriteVSM3MEV", []>; + +defm "" : LMULReadAdvance<"ReadVSM3CV", 0>; +defm "" : LMULReadAdvance<"ReadVSM3MEV", 0>; +} +} + +// Helper class to define all RISC-V Vector Crypto extensions as unsupported +multiclass UnsupportedSchedZvk { +defm "" : UnsupportedSchedZvbb; +defm "" : UnsupportedSchedZvbc; +defm "" : UnsupportedSchedZvkb; +defm "" : UnsupportedSchedZvkg; +defm "" : UnsupportedSchedZvknhaOrZvknhb; +defm "" : UnsupportedSchedZvkned; +defm "" : UnsupportedSchedZvksed; +defm "" : UnsupportedSchedZvksh; +} diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s index 4207477d0e7ae..61915375dd28b 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s @@ -151,12 +151,12 @@ vwsll.vi v8, v4, 8 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 133 -# CHECK-NEXT: Total Cycles: 170 +# CHECK-NEXT: Total Cycles: 166 # CHECK-NEXT: Total uOps: 133 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.78 -# CHECK-NEXT: IPC: 0.78 +# CHECK-NEXT: uOps Per Cycle: 0.80 +# CHECK-NEXT: IPC: 0.80 # CHECK-NEXT: Block RThroughput: 164.0 # CHECK: Instruction Info: @@ -171,136 +171,136 @@ vwsll.vi v8, v4, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu # CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12 # CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vbrev.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vbrev8.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vrev8.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vclz.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vctz.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vcpop.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vrol.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vror.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vror.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vror.vi v4, v8, 8 -# CHECK-NEXT: 1 6 0.50 vwsll.vv v4, v8, v12 -# CHECK-NEXT: 1 6 0.50 vwsll.vx v4, v8, a0 -# CHECK-NEXT: 1 6 0.50 vwsll.vi v4, v8, 8 +# CHECK-NEXT: 1 2 0.50 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vclz.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vctz.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vror.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vror.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vror.vi v4, v8, 8 +# CHECK-NEXT: 1 2 0.50 vwsll.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vwsll.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vwsll.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12 # CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vbrev.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vbrev8.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vrev8.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vclz.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vctz.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vcpop.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vrol.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vror.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vror.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vror.vi v4, v8, 8 -# CHECK-NEXT: 1 6 0.50 vwsll.vv v4, v8, v12 -# CHECK-NEXT: 1 6 0.50 vwsll.vx v4, v8, a0 -# CHECK-NEXT: 1 6 0.50 vwsll.vi v4, v8, 8 +# CHECK-NEXT: 1 2 0.50 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vclz.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vctz.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vror.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vror.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vror.vi v4, v8, 8 +# CHECK-NEXT: 1 2 0.50 vwsll.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vwsll.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vwsll.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12 # CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vbrev.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vbrev8.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vrev8.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vclz.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vctz.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vcpop.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vrol.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vror.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vror.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vror.vi v4, v8, 8 -# CHECK-NEXT: 1 6 0.50 vwsll.vv v4, v8, v12 -# CHECK-NEXT: 1 6 0.50 vwsll.vx v4, v8, a0 -# CHECK-NEXT: 1 6 0.50 vwsll.vi v4, v8, 8 +# CHECK-NEXT: 1 2 0.50 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vclz.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vctz.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vror.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vror.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vror.vi v4, v8, 8 +# CHECK-NEXT: 1 2 0.50 vwsll.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vwsll.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vwsll.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu # CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12 # CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vbrev.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vbrev8.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vrev8.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vclz.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vctz.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vcpop.v v4, v8 -# CHECK-NEXT: 1 1 0.50 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vrol.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vror.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vror.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vror.vi v4, v8, 8 -# CHECK-NEXT: 1 6 0.50 vwsll.vv v4, v8, v12 -# CHECK-NEXT: 1 6 0.50 vwsll.vx v4, v8, a0 -# CHECK-NEXT: 1 6 0.50 vwsll.vi v4, v8, 8 +# CHECK-NEXT: 1 2 0.50 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vclz.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vctz.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 0.50 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vror.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vror.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vror.vi v4, v8, 8 +# CHECK-NEXT: 1 2 0.50 vwsll.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vwsll.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vwsll.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu # CHECK-NEXT: 1 1 1.00 vandn.vv v4, v8, v12 # CHECK-NEXT: 1 1 1.00 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 1 1.00 vbrev.v v4, v8 -# CHECK-NEXT: 1 1 1.00 vbrev8.v v4, v8 -# CHECK-NEXT: 1 1 1.00 vrev8.v v4, v8 -# CHECK-NEXT: 1 1 1.00 vclz.v v4, v8 -# CHECK-NEXT: 1 1 1.00 vctz.v v4, v8 -# CHECK-NEXT: 1 1 1.00 vcpop.v v4, v8 -# CHECK-NEXT: 1 1 1.00 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 vrol.vx v4, v8, a0 -# CHECK-NEXT: 1 1 1.00 vror.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 vror.vx v4, v8, a0 -# CHECK-NEXT: 1 1 1.00 vror.vi v4, v8, 8 -# CHECK-NEXT: 1 6 1.00 vwsll.vv v4, v8, v12 -# CHECK-NEXT: 1 6 1.00 vwsll.vx v4, v8, a0 -# CHECK-NEXT: 1 6 1.00 vwsll.vi v4, v8, 8 +# CHECK-NEXT: 1 2 1.00 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vclz.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vctz.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vror.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vror.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vror.vi v4, v8, 8 +# CHECK-NEXT: 1 2 1.00 vwsll.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vwsll.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vwsll.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu # CHECK-NEXT: 1 1 2.00 vandn.vv v4, v8, v12 # CHECK-NEXT: 1 1 2.00 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 1 2.00 vbrev.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vbrev8.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vrev8.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vclz.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vctz.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vcpop.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vrol.vx v4, v8, a0 -# CHECK-NEXT: 1 1 2.00 vror.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vror.vx v4, v8, a0 -# CHECK-NEXT: 1 1 2.00 vror.vi v4, v8, 8 -# CHECK-NEXT: 1 6 2.00 vwsll.vv v8, v4, v12 -# CHECK-NEXT: 1 6 2.00 vwsll.vx v8, v4, a0 -# CHECK-NEXT: 1 6 2.00 vwsll.vi v8, v4, 8 +# CHECK-NEXT: 1 2 2.00 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vclz.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vctz.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 2.00 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 2.00 vror.vv v4, v8, v12 +# CHECK-NEXT: 1 2 2.00 vror.vx v4, v8, a0 +# CHECK-NEXT: 1 2 2.00 vror.vi v4, v8, 8 +# CHECK-NEXT: 1 2 2.00 vwsll.vv v8, v4, v12 +# CHECK-NEXT: 1 2 2.00 vwsll.vx v8, v4, a0 +# CHECK-NEXT: 1 2 2.00 vwsll.vi v8, v4, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: 1 1 4.00 vandn.vv v8, v16, v24 # CHECK-NEXT: 1 1 4.00 vandn.vx v8, v16, a0 -# CHECK-NEXT: 1 1 4.00 vbrev.v v8, v16 -# CHECK-NEXT: 1 1 4.00 vbrev8.v v8, v16 -# CHECK-NEXT: 1 1 4.00 vrev8.v v8, v16 -# CHECK-NEXT: 1 1 4.00 vclz.v v8, v16 -# CHECK-NEXT: 1 1 4.00 vctz.v v8, v16 -# CHECK-NEXT: 1 1 4.00 vcpop.v v8, v16 -# CHECK-NEXT: 1 1 4.00 vrol.vv v8, v16, v24 -# CHECK-NEXT: 1 1 4.00 vrol.vx v8, v16, a0 -# CHECK-NEXT: 1 1 4.00 vror.vv v8, v16, v24 -# CHECK-NEXT: 1 1 4.00 vror.vx v8, v16, a0 -# CHECK-NEXT: 1 1 4.00 vror.vi v8, v16, 8 +# CHECK-NEXT: 1 2 4.00 vbrev.v v8, v16 +# CHECK-NEXT: 1 2 4.00 vbrev8.v v8, v16 +# CHECK-NEXT: 1 2 4.00 vrev8.v v8, v16 +# CHECK-NEXT: 1 2 4.00 vclz.v v8, v16 +# CHECK-NEXT: 1 2 4.00 vctz.v v8, v16 +# CHECK-NEXT: 1 2 4.00 vcpop.v v8, v16 +# CHECK-NEXT: 1 2 4.00 vrol.vv v8, v16, v24 +# CHECK-NEXT: 1 2 4.00 vrol.vx v8, v16, a0 +# CHECK-NEXT: 1 2 4.00 vror.vv v8, v16, v24 +# CHECK-NEXT: 1 2 4.00 vror.vx v8, v16, a0 +# CHECK-NEXT: 1 2 4.00 vror.vi v8, v16, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m4, tu, mu # CHECK-NEXT: 1 1 2.00 vandn.vv v4, v8, v12 # CHECK-NEXT: 1 1 2.00 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 1 2.00 vbrev.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vbrev8.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vrev8.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vclz.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vctz.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vcpop.v v4, v8 -# CHECK-NEXT: 1 1 2.00 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vrol.vx v4, v8, a0 -# CHECK-NEXT: 1 1 2.00 vror.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vror.vx v4, v8, a0 -# CHECK-NEXT: 1 1 2.00 vror.vi v4, v8, 8 -# CHECK-NEXT: 1 6 2.00 vwsll.vv v8, v4, v12 -# CHECK-NEXT: 1 6 2.00 vwsll.vx v8, v4, a0 -# CHECK-NEXT: 1 6 2.00 vwsll.vi v8, v4, 8 +# CHECK-NEXT: 1 2 2.00 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vclz.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vctz.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 2.00 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 2.00 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 2.00 vror.vv v4, v8, v12 +# CHECK-NEXT: 1 2 2.00 vror.vx v4, v8, a0 +# CHECK-NEXT: 1 2 2.00 vror.vi v4, v8, 8 +# CHECK-NEXT: 1 2 2.00 vwsll.vv v8, v4, v12 +# CHECK-NEXT: 1 2 2.00 vwsll.vx v8, v4, a0 +# CHECK-NEXT: 1 2 2.00 vwsll.vi v8, v4, 8 # CHECK: Resources: # CHECK-NEXT: [0] - SiFiveP600Div diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s index 291befcd8ba44..faf75234ff3b7 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s @@ -29,12 +29,12 @@ vclmulh.vx v8, v12, a0 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 20 -# CHECK-NEXT: Total Cycles: 27 +# CHECK-NEXT: Total Cycles: 28 # CHECK-NEXT: Total uOps: 20 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.74 -# CHECK-NEXT: IPC: 0.74 +# CHECK-NEXT: uOps Per Cycle: 0.71 +# CHECK-NEXT: IPC: 0.71 # CHECK-NEXT: Block RThroughput: 30.0 # CHECK: Instruction Info: @@ -47,25 +47,25 @@ vclmulh.vx v8, v12, a0 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 0.50 vclmul.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vclmul.vx v4, v8, a0 -# CHECK-NEXT: 1 1 0.50 vclmulh.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vclmulh.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vclmul.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vclmul.vx v4, v8, a0 +# CHECK-NEXT: 1 2 0.50 vclmulh.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vclmulh.vx v4, v8, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 vclmul.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 vclmul.vx v4, v8, a0 -# CHECK-NEXT: 1 1 1.00 vclmulh.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 vclmulh.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vclmul.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vclmul.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vclmulh.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vclmulh.vx v4, v8, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 1 2.00 vclmul.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vclmul.vx v4, v8, a0 -# CHECK-NEXT: 1 1 2.00 vclmulh.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vclmulh.vx v4, v8, a0 +# CHECK-NEXT: 1 2 2.00 vclmul.vv v4, v8, v12 +# CHECK-NEXT: 1 2 2.00 vclmul.vx v4, v8, a0 +# CHECK-NEXT: 1 2 2.00 vclmulh.vv v4, v8, v12 +# CHECK-NEXT: 1 2 2.00 vclmulh.vx v4, v8, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, tu, mu -# CHECK-NEXT: 1 1 4.00 vclmul.vv v8, v12, v24 -# CHECK-NEXT: 1 1 4.00 vclmul.vx v8, v12, a0 -# CHECK-NEXT: 1 1 4.00 vclmulh.vv v8, v12, v24 -# CHECK-NEXT: 1 1 4.00 vclmulh.vx v8, v12, a0 +# CHECK-NEXT: 1 2 4.00 vclmul.vv v8, v12, v24 +# CHECK-NEXT: 1 2 4.00 vclmul.vx v8, v12, a0 +# CHECK-NEXT: 1 2 4.00 vclmulh.vv v8, v12, v24 +# CHECK-NEXT: 1 2 4.00 vclmulh.vx v8, v12, a0 # CHECK: Resources: # CHECK-NEXT: [0] - SiFiveP600Div diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s index 9a64ac9276946..d974a077ab5a9 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s @@ -36,12 +36,12 @@ vgmul.vv v4, v8 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 24 -# CHECK-NEXT: Total Cycles: 38 +# CHECK-NEXT: Total Cycles: 45 # CHECK-NEXT: Total uOps: 24 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.63 -# CHECK-NEXT: IPC: 0.63 +# CHECK-NEXT: uOps Per Cycle: 0.53 +# CHECK-NEXT: IPC: 0.53 # CHECK-NEXT: Block RThroughput: 36.0 # CHECK: Instruction Info: @@ -54,29 +54,29 @@ vgmul.vv v4, v8 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 4.00 vghsh.vv v4, v8, v12 -# CHECK-NEXT: 1 1 4.00 vgmul.vv v4, v8 +# CHECK-NEXT: 1 2 4.00 vghsh.vv v4, v8, v12 +# CHECK-NEXT: 1 2 4.00 vgmul.vv v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 4.00 vghsh.vv v4, v8, v12 -# CHECK-NEXT: 1 1 4.00 vgmul.vv v4, v8 +# CHECK-NEXT: 1 2 4.00 vghsh.vv v4, v8, v12 +# CHECK-NEXT: 1 2 4.00 vgmul.vv v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 0.50 vghsh.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vgmul.vv v4, v8 +# CHECK-NEXT: 1 2 0.50 vghsh.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vgmul.vv v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 0.50 vghsh.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vgmul.vv v4, v8 +# CHECK-NEXT: 1 2 0.50 vghsh.vv v4, v8, v12 +# CHECK-NEXT: 1 2 0.50 vgmul.vv v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 vghsh.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 vgmul.vv v4, v8 +# CHECK-NEXT: 1 2 1.00 vghsh.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vgmul.vv v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 2.00 vghsh.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vgmul.vv v4, v8 +# CHECK-NEXT: 1 2 2.00 vghsh.vv v4, v8, v12 +# CHECK-NEXT: 1 2 2.00 vgmul.vv v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu -# CHECK-NEXT: 1 1 4.00 vghsh.vv v8, v16, v24 -# CHECK-NEXT: 1 1 4.00 vgmul.vv v8, v16 +# CHECK-NEXT: 1 2 4.00 vghsh.vv v8, v16, v24 +# CHECK-NEXT: 1 2 4.00 vgmul.vv v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 1 2.00 vghsh.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vgmul.vv v4, v8 +# CHECK-NEXT: 1 2 2.00 vghsh.vv v4, v8, v12 +# CHECK-NEXT: 1 2 2.00 vgmul.vv v4, v8 # CHECK: Resources: # CHECK-NEXT: [0] - SiFiveP600Div @@ -98,7 +98,7 @@ vgmul.vv v4, v8 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] -# CHECK-NEXT: - - - - 8.00 - - - - - - 35.00 37.00 - - - +# CHECK-NEXT: - - - - 8.00 - - - - - - 36.00 36.00 - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: @@ -112,17 +112,17 @@ vgmul.vv v4, v8 # CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vghsh.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vgmul.vv v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vghsh.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vghsh.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vgmul.vv v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vghsh.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vghsh.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vgmul.vv v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vghsh.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vgmul.vv v4, v8 +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vghsh.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vgmul.vv v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vghsh.vv v8, v16, v24 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vgmul.vv v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m4, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vghsh.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vgmul.vv v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vgmul.vv v4, v8 diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkned.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkned.s index beea3efeaed06..a5c226e34452d 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkned.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkned.s @@ -60,12 +60,12 @@ vaesz.vs v8, v16 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 50 -# CHECK-NEXT: Total Cycles: 72 +# CHECK-NEXT: Total Cycles: 73 # CHECK-NEXT: Total uOps: 50 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.69 -# CHECK-NEXT: IPC: 0.69 +# CHECK-NEXT: uOps Per Cycle: 0.68 +# CHECK-NEXT: IPC: 0.68 # CHECK-NEXT: Block RThroughput: 72.0 # CHECK: Instruction Info: @@ -78,54 +78,54 @@ vaesz.vs v8, v16 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 0.50 vaesef.vv v4, v8 -# CHECK-NEXT: 1 1 0.50 vaesef.vs v4, v8 -# CHECK-NEXT: 1 1 0.50 vaesem.vv v4, v8 -# CHECK-NEXT: 1 1 0.50 vaesem.vs v4, v8 -# CHECK-NEXT: 1 1 0.50 vaesdm.vv v4, v8 -# CHECK-NEXT: 1 1 0.50 vaesdm.vs v4, v8 -# CHECK-NEXT: 1 1 0.50 vaeskf1.vi v4, v8, 8 -# CHECK-NEXT: 1 1 0.50 vaeskf2.vi v4, v8, 8 +# CHECK-NEXT: 1 2 0.50 vaesef.vv v4, v8 +# CHECK-NEXT: 1 2 0.50 vaesef.vs v4, v8 +# CHECK-NEXT: 1 2 0.50 vaesem.vv v4, v8 +# CHECK-NEXT: 1 2 0.50 vaesem.vs v4, v8 +# CHECK-NEXT: 1 2 0.50 vaesdm.vv v4, v8 +# CHECK-NEXT: 1 2 0.50 vaesdm.vs v4, v8 +# CHECK-NEXT: 1 2 0.50 vaeskf1.vi v4, v8, 8 +# CHECK-NEXT: 1 2 0.50 vaeskf2.vi v4, v8, 8 # CHECK-NEXT: 1 1 0.50 vaesz.vs v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 0.50 vaesef.vv v4, v8 -# CHECK-NEXT: 1 1 0.50 vaesef.vs v4, v8 -# CHECK-NEXT: 1 1 0.50 vaesem.vv v4, v8 -# CHECK-NEXT: 1 1 0.50 vaesem.vs v4, v8 -# CHECK-NEXT: 1 1 0.50 vaesdm.vv v4, v8 -# CHECK-NEXT: 1 1 0.50 vaesdm.vs v4, v8 -# CHECK-NEXT: 1 1 0.50 vaeskf1.vi v4, v8, 8 -# CHECK-NEXT: 1 1 0.50 vaeskf2.vi v4, v8, 8 +# CHECK-NEXT: 1 2 0.50 vaesef.vv v4, v8 +# CHECK-NEXT: 1 2 0.50 vaesef.vs v4, v8 +# CHECK-NEXT: 1 2 0.50 vaesem.vv v4, v8 +# CHECK-NEXT: 1 2 0.50 vaesem.vs v4, v8 +# CHECK-NEXT: 1 2 0.50 vaesdm.vv v4, v8 +# CHECK-NEXT: 1 2 0.50 vaesdm.vs v4, v8 +# CHECK-NEXT: 1 2 0.50 vaeskf1.vi v4, v8, 8 +# CHECK-NEXT: 1 2 0.50 vaeskf2.vi v4, v8, 8 # CHECK-NEXT: 1 1 0.50 vaesz.vs v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 vaesef.vv v4, v8 -# CHECK-NEXT: 1 1 1.00 vaesef.vs v4, v8 -# CHECK-NEXT: 1 1 1.00 vaesem.vv v4, v8 -# CHECK-NEXT: 1 1 1.00 vaesem.vs v4, v8 -# CHECK-NEXT: 1 1 1.00 vaesdm.vv v4, v8 -# CHECK-NEXT: 1 1 1.00 vaesdm.vs v4, v8 -# CHECK-NEXT: 1 1 1.00 vaeskf1.vi v4, v8, 8 -# CHECK-NEXT: 1 1 1.00 vaeskf2.vi v4, v8, 8 +# CHECK-NEXT: 1 2 1.00 vaesef.vv v4, v8 +# CHECK-NEXT: 1 2 1.00 vaesef.vs v4, v8 +# CHECK-NEXT: 1 2 1.00 vaesem.vv v4, v8 +# CHECK-NEXT: 1 2 1.00 vaesem.vs v4, v8 +# CHECK-NEXT: 1 2 1.00 vaesdm.vv v4, v8 +# CHECK-NEXT: 1 2 1.00 vaesdm.vs v4, v8 +# CHECK-NEXT: 1 2 1.00 vaeskf1.vi v4, v8, 8 +# CHECK-NEXT: 1 2 1.00 vaeskf2.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 vaesz.vs v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 2.00 vaesef.vv v4, v8 -# CHECK-NEXT: 1 1 2.00 vaesef.vs v4, v8 -# CHECK-NEXT: 1 1 2.00 vaesem.vv v4, v8 -# CHECK-NEXT: 1 1 2.00 vaesem.vs v4, v8 -# CHECK-NEXT: 1 1 2.00 vaesdm.vv v4, v8 -# CHECK-NEXT: 1 1 2.00 vaesdm.vs v4, v8 -# CHECK-NEXT: 1 1 2.00 vaeskf1.vi v4, v8, 8 -# CHECK-NEXT: 1 1 2.00 vaeskf2.vi v4, v8, 8 +# CHECK-NEXT: 1 2 2.00 vaesef.vv v4, v8 +# CHECK-NEXT: 1 2 2.00 vaesef.vs v4, v8 +# CHECK-NEXT: 1 2 2.00 vaesem.vv v4, v8 +# CHECK-NEXT: 1 2 2.00 vaesem.vs v4, v8 +# CHECK-NEXT: 1 2 2.00 vaesdm.vv v4, v8 +# CHECK-NEXT: 1 2 2.00 vaesdm.vs v4, v8 +# CHECK-NEXT: 1 2 2.00 vaeskf1.vi v4, v8, 8 +# CHECK-NEXT: 1 2 2.00 vaeskf2.vi v4, v8, 8 # CHECK-NEXT: 1 1 2.00 vaesz.vs v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu -# CHECK-NEXT: 1 1 4.00 vaesef.vv v8, v16 -# CHECK-NEXT: 1 1 4.00 vaesef.vs v8, v16 -# CHECK-NEXT: 1 1 4.00 vaesem.vv v8, v16 -# CHECK-NEXT: 1 1 4.00 vaesem.vs v8, v16 -# CHECK-NEXT: 1 1 4.00 vaesdm.vv v8, v16 -# CHECK-NEXT: 1 1 4.00 vaesdm.vs v8, v16 -# CHECK-NEXT: 1 1 4.00 vaeskf1.vi v8, v16, 8 -# CHECK-NEXT: 1 1 4.00 vaeskf2.vi v8, v16, 8 +# CHECK-NEXT: 1 2 4.00 vaesef.vv v8, v16 +# CHECK-NEXT: 1 2 4.00 vaesef.vs v8, v16 +# CHECK-NEXT: 1 2 4.00 vaesem.vv v8, v16 +# CHECK-NEXT: 1 2 4.00 vaesem.vs v8, v16 +# CHECK-NEXT: 1 2 4.00 vaesdm.vv v8, v16 +# CHECK-NEXT: 1 2 4.00 vaesdm.vs v8, v16 +# CHECK-NEXT: 1 2 4.00 vaeskf1.vi v8, v16, 8 +# CHECK-NEXT: 1 2 4.00 vaeskf2.vi v8, v16, 8 # CHECK-NEXT: 1 1 4.00 vaesz.vs v8, v16 # CHECK: Resources: @@ -148,57 +148,57 @@ vaesz.vs v8, v16 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] -# CHECK-NEXT: - - - - 5.00 - - - - - - 69.00 75.00 - - - +# CHECK-NEXT: - - - - 5.00 - - - - - - 71.00 73.00 - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesef.vv v4, v8 # CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesef.vs v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesem.vv v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesem.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesem.vv v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesem.vs v4, v8 # CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesdm.vv v4, v8 # CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesdm.vs v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaeskf1.vi v4, v8, 8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaeskf2.vi v4, v8, 8 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesz.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesz.vs v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesef.vv v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesef.vs v4, v8 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesem.vv v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesem.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesem.vv v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesem.vs v4, v8 # CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesdm.vv v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesdm.vs v4, v8 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaeskf1.vi v4, v8, 8 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaeskf2.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesdm.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaeskf1.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaeskf2.vi v4, v8, 8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesz.vs v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesef.vv v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesef.vv v4, v8 # CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesef.vs v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesem.vv v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesem.vv v4, v8 # CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesem.vs v4, v8 # CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesdm.vv v4, v8 -# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesdm.vs v4, v8 -# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaeskf1.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesdm.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaeskf1.vi v4, v8, 8 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaeskf2.vi v4, v8, 8 -# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesz.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesz.vs v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesef.vv v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vaesef.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesef.vs v4, v8 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesem.vv v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vaesem.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesem.vs v4, v8 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesdm.vv v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vaesdm.vs v4, v8 -# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaeskf1.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesdm.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vaeskf1.vi v4, v8, 8 # CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vaeskf2.vi v4, v8, 8 -# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesz.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vaesz.vs v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaesef.vv v8, v16 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesef.vs v8, v16 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaesem.vv v8, v16 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesem.vv v8, v16 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesem.vs v8, v16 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaesdm.vv v8, v16 -# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesdm.vs v8, v16 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesdm.vv v8, v16 +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaesdm.vs v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaeskf1.vi v8, v16, 8 -# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaeskf2.vi v8, v16, 8 +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaeskf2.vi v8, v16, 8 # CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaesz.vs v8, v16 diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s index 0e26e5bacaf21..f1a2a1899f0cf 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s @@ -45,12 +45,12 @@ vsha2cl.vv v8, v16, v24 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 32 -# CHECK-NEXT: Total Cycles: 45 +# CHECK-NEXT: Total Cycles: 68 # CHECK-NEXT: Total uOps: 32 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.71 -# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: uOps Per Cycle: 0.47 +# CHECK-NEXT: IPC: 0.47 # CHECK-NEXT: Block RThroughput: 45.0 # CHECK: Instruction Info: @@ -63,37 +63,37 @@ vsha2cl.vv v8, v16, v24 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 0.50 vsha2ms.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vsha2ch.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vsha2cl.vv v4, v8, v12 +# CHECK-NEXT: 1 3 0.50 vsha2ms.vv v4, v8, v12 +# CHECK-NEXT: 1 3 0.50 vsha2ch.vv v4, v8, v12 +# CHECK-NEXT: 1 3 0.50 vsha2cl.vv v4, v8, v12 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 vsha2ms.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 vsha2ch.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 vsha2cl.vv v4, v8, v12 +# CHECK-NEXT: 1 3 1.00 vsha2ms.vv v4, v8, v12 +# CHECK-NEXT: 1 3 1.00 vsha2ch.vv v4, v8, v12 +# CHECK-NEXT: 1 3 1.00 vsha2cl.vv v4, v8, v12 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 2.00 vsha2ms.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vsha2ch.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vsha2cl.vv v4, v8, v12 +# CHECK-NEXT: 1 3 2.00 vsha2ms.vv v4, v8, v12 +# CHECK-NEXT: 1 3 2.00 vsha2ch.vv v4, v8, v12 +# CHECK-NEXT: 1 3 2.00 vsha2cl.vv v4, v8, v12 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu -# CHECK-NEXT: 1 1 4.00 vsha2ms.vv v8, v16, v24 -# CHECK-NEXT: 1 1 4.00 vsha2ch.vv v8, v16, v24 -# CHECK-NEXT: 1 1 4.00 vsha2cl.vv v8, v16, v24 +# CHECK-NEXT: 1 3 4.00 vsha2ms.vv v8, v16, v24 +# CHECK-NEXT: 1 3 4.00 vsha2ch.vv v8, v16, v24 +# CHECK-NEXT: 1 3 4.00 vsha2cl.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 0.50 vsha2ms.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vsha2ch.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vsha2cl.vv v4, v8, v12 +# CHECK-NEXT: 1 3 0.50 vsha2ms.vv v4, v8, v12 +# CHECK-NEXT: 1 3 0.50 vsha2ch.vv v4, v8, v12 +# CHECK-NEXT: 1 3 0.50 vsha2cl.vv v4, v8, v12 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 vsha2ms.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 vsha2ch.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 vsha2cl.vv v4, v8, v12 +# CHECK-NEXT: 1 3 1.00 vsha2ms.vv v4, v8, v12 +# CHECK-NEXT: 1 3 1.00 vsha2ch.vv v4, v8, v12 +# CHECK-NEXT: 1 3 1.00 vsha2cl.vv v4, v8, v12 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 1 2.00 vsha2ms.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vsha2ch.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vsha2cl.vv v4, v8, v12 +# CHECK-NEXT: 1 3 2.00 vsha2ms.vv v4, v8, v12 +# CHECK-NEXT: 1 3 2.00 vsha2ch.vv v4, v8, v12 +# CHECK-NEXT: 1 3 2.00 vsha2cl.vv v4, v8, v12 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, tu, mu -# CHECK-NEXT: 1 1 4.00 vsha2ms.vv v8, v16, v24 -# CHECK-NEXT: 1 1 4.00 vsha2ch.vv v8, v16, v24 -# CHECK-NEXT: 1 1 4.00 vsha2cl.vv v8, v16, v24 +# CHECK-NEXT: 1 3 4.00 vsha2ms.vv v8, v16, v24 +# CHECK-NEXT: 1 3 4.00 vsha2ch.vv v8, v16, v24 +# CHECK-NEXT: 1 3 4.00 vsha2cl.vv v8, v16, v24 # CHECK: Resources: # CHECK-NEXT: [0] - SiFiveP600Div @@ -115,39 +115,39 @@ vsha2cl.vv v8, v16, v24 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] -# CHECK-NEXT: - - - - 8.00 - - - - - - 46.00 44.00 - - - +# CHECK-NEXT: - - - - 8.00 - - - - - - 44.00 46.00 - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vsha2ms.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ch.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vsha2cl.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2cl.vv v4, v8, v12 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2ms.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vsha2ms.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2ch.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2cl.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vsha2cl.vv v4, v8, v12 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2ms.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vsha2ch.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2ch.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2cl.vv v4, v8, v12 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsha2ms.vv v8, v16, v24 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsha2ch.vv v8, v16, v24 -# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsha2cl.vv v8, v16, v24 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsha2ch.vv v8, v16, v24 +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsha2cl.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vsha2ms.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ms.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vsha2ch.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2cl.vv v4, v8, v12 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2ms.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vsha2ms.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2ch.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2cl.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vsha2cl.vv v4, v8, v12 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m4, tu, mu # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2ms.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vsha2ch.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2cl.vv v4, v8, v12 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsha2ms.vv v8, v16, v24 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsha2ms.vv v8, v16, v24 # CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsha2ch.vv v8, v16, v24 -# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsha2cl.vv v8, v16, v24 +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsha2cl.vv v8, v16, v24 diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksed.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksed.s index 5cfd266bf14a7..26fc8702d75fc 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksed.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksed.s @@ -30,13 +30,13 @@ vsm4r.vs v8, v16 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 20 -# CHECK-NEXT: Total Cycles: 24 +# CHECK-NEXT: Total Cycles: 47 # CHECK-NEXT: Total uOps: 20 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.83 -# CHECK-NEXT: IPC: 0.83 -# CHECK-NEXT: Block RThroughput: 24.0 +# CHECK-NEXT: uOps Per Cycle: 0.43 +# CHECK-NEXT: IPC: 0.43 +# CHECK-NEXT: Block RThroughput: 48.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -48,25 +48,25 @@ vsm4r.vs v8, v16 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 0.50 vsm4k.vi v4, v8, 8 -# CHECK-NEXT: 1 1 0.50 vsm4r.vv v4, v8 -# CHECK-NEXT: 1 1 0.50 vsm4r.vs v4, v8 +# CHECK-NEXT: 1 3 1.00 vsm4k.vi v4, v8, 8 +# CHECK-NEXT: 1 3 1.00 vsm4r.vv v4, v8 +# CHECK-NEXT: 1 3 1.00 vsm4r.vs v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 0.50 vsm4k.vi v4, v8, 8 -# CHECK-NEXT: 1 1 0.50 vsm4r.vv v4, v8 -# CHECK-NEXT: 1 1 0.50 vsm4r.vs v4, v8 +# CHECK-NEXT: 1 3 1.00 vsm4k.vi v4, v8, 8 +# CHECK-NEXT: 1 3 1.00 vsm4r.vv v4, v8 +# CHECK-NEXT: 1 3 1.00 vsm4r.vs v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 vsm4k.vi v4, v8, 8 -# CHECK-NEXT: 1 1 1.00 vsm4r.vv v4, v8 -# CHECK-NEXT: 1 1 1.00 vsm4r.vs v4, v8 +# CHECK-NEXT: 1 3 2.00 vsm4k.vi v4, v8, 8 +# CHECK-NEXT: 1 3 2.00 vsm4r.vv v4, v8 +# CHECK-NEXT: 1 3 2.00 vsm4r.vs v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 2.00 vsm4k.vi v4, v8, 8 -# CHECK-NEXT: 1 1 2.00 vsm4r.vv v4, v8 -# CHECK-NEXT: 1 1 2.00 vsm4r.vs v4, v8 +# CHECK-NEXT: 1 3 4.00 vsm4k.vi v4, v8, 8 +# CHECK-NEXT: 1 3 4.00 vsm4r.vv v4, v8 +# CHECK-NEXT: 1 3 4.00 vsm4r.vs v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu -# CHECK-NEXT: 1 1 4.00 vsm4k.vi v8, v16, 8 -# CHECK-NEXT: 1 1 4.00 vsm4r.vv v8, v16 -# CHECK-NEXT: 1 1 4.00 vsm4r.vs v8, v16 +# CHECK-NEXT: 1 3 8.00 vsm4k.vi v8, v16, 8 +# CHECK-NEXT: 1 3 8.00 vsm4r.vv v8, v16 +# CHECK-NEXT: 1 3 8.00 vsm4r.vs v8, v16 # CHECK: Resources: # CHECK-NEXT: [0] - SiFiveP600Div @@ -88,27 +88,27 @@ vsm4r.vs v8, v16 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] -# CHECK-NEXT: - - - - 5.00 - - - - - - 21.00 27.00 - - - +# CHECK-NEXT: - - - - 5.00 - - - - - - 48.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vsm4k.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsm4k.vi v4, v8, 8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsm4r.vv v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsm4r.vs v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vsm4k.vi v4, v8, 8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vsm4r.vv v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsm4k.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsm4r.vv v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsm4r.vs v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vsm4k.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsm4k.vi v4, v8, 8 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsm4r.vv v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vsm4r.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsm4r.vs v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsm4k.vi v4, v8, 8 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vsm4r.vv v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsm4r.vv v4, v8 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsm4r.vs v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsm4k.vi v8, v16, 8 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsm4k.vi v8, v16, 8 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsm4r.vv v8, v16 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsm4r.vs v8, v16 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsm4r.vs v8, v16 diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksh.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksh.s index 670a5caeca98a..574bbb36c23f3 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksh.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksh.s @@ -25,13 +25,13 @@ vsm3c.vi v8, v16, 8 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 15 -# CHECK-NEXT: Total Cycles: 14 +# CHECK-NEXT: Total Cycles: 31 # CHECK-NEXT: Total uOps: 15 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.07 -# CHECK-NEXT: IPC: 1.07 -# CHECK-NEXT: Block RThroughput: 16.0 +# CHECK-NEXT: uOps Per Cycle: 0.48 +# CHECK-NEXT: IPC: 0.48 +# CHECK-NEXT: Block RThroughput: 32.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -43,20 +43,20 @@ vsm3c.vi v8, v16, 8 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 0.50 vsm3me.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vsm3c.vi v4, v8, 8 +# CHECK-NEXT: 1 3 1.00 vsm3me.vv v4, v8, v12 +# CHECK-NEXT: 1 3 1.00 vsm3c.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 0.50 vsm3me.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vsm3c.vi v4, v8, 8 +# CHECK-NEXT: 1 3 1.00 vsm3me.vv v4, v8, v12 +# CHECK-NEXT: 1 3 1.00 vsm3c.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 vsm3me.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 vsm3c.vi v4, v8, 8 +# CHECK-NEXT: 1 3 2.00 vsm3me.vv v4, v8, v12 +# CHECK-NEXT: 1 3 2.00 vsm3c.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 2.00 vsm3me.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vsm3c.vi v4, v8, 8 +# CHECK-NEXT: 1 3 4.00 vsm3me.vv v4, v8, v12 +# CHECK-NEXT: 1 3 4.00 vsm3c.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu -# CHECK-NEXT: 1 1 4.00 vsm3me.vv v8, v16, v24 -# CHECK-NEXT: 1 1 4.00 vsm3c.vi v8, v16, 8 +# CHECK-NEXT: 1 3 8.00 vsm3me.vv v8, v16, v24 +# CHECK-NEXT: 1 3 8.00 vsm3c.vi v8, v16, 8 # CHECK: Resources: # CHECK-NEXT: [0] - SiFiveP600Div @@ -78,22 +78,22 @@ vsm3c.vi v8, v16, 8 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] -# CHECK-NEXT: - - - - 5.00 - - - - - - 16.00 16.00 - - - +# CHECK-NEXT: - - - - 5.00 - - - - - - 32.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vsm3me.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsm3me.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsm3c.vi v4, v8, 8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vsm3me.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsm3me.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsm3c.vi v4, v8, 8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vsm3me.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsm3me.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsm3c.vi v4, v8, 8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vsm3me.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsm3me.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsm3c.vi v4, v8, 8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsm3me.vv v8, v16, v24 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsm3me.vv v8, v16, v24 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsm3c.vi v8, v16, 8