diff --git a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td index da89e158f9839..5dcbd52e12ac0 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td +++ b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td @@ -8,19 +8,106 @@ //===----------------------------------------------------------------------===// +class AscalonIsWorstCaseMX MxList> { + defvar LLMUL = LargestLMUL.r; + bit c = !eq(mx, LLMUL); +} + +class AscalonIsWorstCaseMXSEW MxList, + bit isF = 0> { + defvar LLMUL = LargestLMUL.r; + defvar SSEW = SmallestSEW.r; + bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW)); +} + +/// Cycle counts that scale with LMUL with LMUL=1 having the same latency as +/// fractional LMULs +class AscalonGetCyclesLMUL { + int c = !cond( + !eq(mx, "M1") : base, + !eq(mx, "M2") : !mul(base, 2), + !eq(mx, "M4") : !mul(base, 4), + !eq(mx, "M8") : !mul(base, 8), + !eq(mx, "MF2") : base, + !eq(mx, "MF4") : base, + !eq(mx, "MF8") : base + ); +} + +/// Linear LMUL scaling starting from smallest fractional LMUL +class AscalonGetCyclesLMULFractional { + int c = !cond( + !eq(mx, "MF8") : base, + !eq(mx, "MF4") : !mul(base, 2), + !eq(mx, "MF2") : !mul(base, 4), + !eq(mx, "M1") : !mul(base, 8), + !eq(mx, "M2") : !mul(base, 16), + !eq(mx, "M4") : !mul(base, 32), + !eq(mx, "M8") : !mul(base, 64) + ); +} + +class AscalonGetCyclesDefault { + int c = AscalonGetCyclesLMUL.c; +} + +class AscalonGetCyclesNarrowing { + int c = !cond( + !eq(mx, "M1") : 4, + !eq(mx, "M2") : 8, + !eq(mx, "M4") : 16, + !eq(mx, "MF2") : 2, + !eq(mx, "MF4") : 1, + !eq(mx, "MF8") : 1 + ); +} + + +class AscalonGetCyclesDivOrSqrt { + int c = !cond( + !eq(sew, 8) : AscalonGetCyclesLMUL.c, + !eq(sew, 16) : AscalonGetCyclesLMUL.c, + !eq(sew, 32) : AscalonGetCyclesLMUL.c, + !eq(sew, 64) : AscalonGetCyclesLMUL.c + ); +} + +class AscalonGetCyclesVRGatherVV { + int c = !cond( + !eq(mx, "M1") : 2, + !eq(mx, "M2") : 4, + !eq(mx, "M4") : 12, + !eq(mx, "M8") : 48, + !eq(mx, "MF2") : 2, + !eq(mx, "MF4") : 2, + !eq(mx, "MF8") : 2 + ); +} + +class AscalonGetCyclesStridedSegmented { + int c = !cond( + !eq(sew, 8) : AscalonGetCyclesLMULFractional.c, + !eq(sew, 16) : AscalonGetCyclesLMULFractional.c, + !eq(sew, 32) : AscalonGetCyclesLMULFractional.c, + !eq(sew, 64) : AscalonGetCyclesLMULFractional.c + ); +} + +//===----------------------------------------------------------------------===// + def TTAscalonD8Model : SchedMachineModel { let IssueWidth = 8; // 8-way decode and dispatch let MicroOpBufferSize = 256; // 256 micro-op re-order buffer let LoadLatency = 4; // Optimistic load latency let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch - let CompleteModel = 0; + let CompleteModel = false; // TODO: supported, but haven't added scheduling info yet. let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx, HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne, HasStdExtZknh, HasStdExtZksed, HasStdExtZksh, - HasStdExtZkr, HasVInstructions, HasVInstructionsI64]; + HasStdExtZkr]; } let SchedModel = TTAscalonD8Model in { @@ -34,11 +121,17 @@ let BufferSize = 16 in { def AscalonFXB : ProcResource<1>; // ALU, INT -> FP/VEC def AscalonFXC : ProcResource<2>; // ALU, BR def AscalonFXD : ProcResource<2>; // ALU - def AscalonFP : ProcResource<2>; - // TODO: two vector units with vector scheduling model. + def AscalonFX : ProcResGroup<[AscalonFXA, AscalonFXB, AscalonFXC, AscalonFXD]>; + // FP + def AscalonFPA : ProcResource<1>; // Pipe A also handles FP/VEC -> INT + def AscalonFPB : ProcResource<1>; + def AscalonFP : ProcResGroup<[AscalonFPA, AscalonFPB]>; + // Vector + def AscalonVA : ProcResource<1>; + def AscalonVB : ProcResource<1>; + def AscalonV : ProcResGroup<[AscalonVA, AscalonVB]>; } -def AscalonFX : ProcResGroup<[AscalonFXA, AscalonFXB, AscalonFXC, AscalonFXD]>; //===----------------------------------------------------------------------===// @@ -316,10 +409,625 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; +//===----------------------------------------------------------------------===// +// Vector +def : WriteRes; + +// Configuration-Setting Instructions +def : WriteRes; +def : WriteRes; +let Latency = 2 in { + def : WriteRes; +} + +// Vector Loads and Stores +foreach mx = SchedMxList in { + defvar Cycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles in { + defm "" : LMULWriteResMX<"WriteVLDE", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDFF", [AscalonLS], mx, IsWorstCase>; + } + defm "" : LMULWriteResMX<"WriteVSTE", [AscalonLS], mx, IsWorstCase>; +} + +foreach mx = SchedMxList in { + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + defm "" : LMULWriteResMX<"WriteVLDM", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTM", [AscalonLS], mx, IsWorstCase>; +} + +foreach mx = SchedMxList in { + defvar Cycles = AscalonGetCyclesLMUL.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles in { + defm "" : LMULWriteResMX<"WriteVLDS8", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDUX8", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX8", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTS8", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTUX8", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX8", [AscalonLS], mx, IsWorstCase>; + } +} +foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in { + defvar Cycles = AscalonGetCyclesLMUL.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles in { + defm "" : LMULWriteResMX<"WriteVLDS16", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDUX16", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX16", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTS16", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTUX16", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX16", [AscalonLS], mx, IsWorstCase>; + } +} +foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in { + defvar Cycles = AscalonGetCyclesLMUL.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles in { + defm "" : LMULWriteResMX<"WriteVLDS32", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDUX32", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX32", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTS32", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTUX32", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX32", [AscalonLS], mx, IsWorstCase>; + } +} +foreach mx = ["M1", "M2", "M4", "M8"] in { + defvar Cycles = AscalonGetCyclesLMUL.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles in { + defm "" : LMULWriteResMX<"WriteVLDS64", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDUX64", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX64", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTS64", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTUX64", [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX64", [AscalonLS], mx, IsWorstCase>; + } +} + +// VLD*R is LMUL aware +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +// VST*R is LMUL aware +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Segmented Loads and Stores +foreach mx = SchedMxList in { + foreach eew = [8, 16, 32, 64] in { + foreach nf=2-8 in { + defvar Cycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles in { + defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>; + } + let Latency = 1, AcquireAtCycles = [1], ReleaseAtCycles = [!add(1, Cycles)] in + defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>; + } + } +} +foreach mx = SchedMxList in { + foreach nf=2-8 in { + foreach eew = [8, 16, 32, 64] in { + defvar Cycles = AscalonGetCyclesStridedSegmented.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles in { + defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>; + } + } + } +} + +// Vector Fixed-Point Arithmetic Instructions +foreach mx = SchedMxList in { + defvar Cycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in { + defm "" : LMULWriteResMX<"WriteVSALUV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSALUX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSALUI", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAALUV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAALUX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSMulV", [AscalonFXA, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSMulX", [AscalonFXA, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftI", [AscalonFX, AscalonV], mx, IsWorstCase>; + } +} +// Narrowing +foreach mx = SchedMxListW in { + defvar Cycles = AscalonGetCyclesNarrowing.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in { + defm "" : LMULWriteResMX<"WriteVNClipV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipI", [AscalonFX, AscalonV], mx, IsWorstCase>; + } +} + +// Configuration-Setting Instructions +def : ReadAdvance; +def : ReadAdvance; + +// Vector Loads and Stores +def : ReadAdvance; +def : ReadAdvance; +defm "" : LMULReadAdvance<"ReadVSTEV", 0>; +defm "" : LMULReadAdvance<"ReadVSTM", 0>; +def : ReadAdvance; +def : ReadAdvance; +defm "" : LMULReadAdvance<"ReadVSTS8V", 0>; +defm "" : LMULReadAdvance<"ReadVSTS16V", 0>; +defm "" : LMULReadAdvance<"ReadVSTS32V", 0>; +defm "" : LMULReadAdvance<"ReadVSTS64V", 0>; +defm "" : LMULReadAdvance<"ReadVLDUXV", 0>; +defm "" : LMULReadAdvance<"ReadVLDOXV", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX8", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX16", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX32", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX64", 0>; +defm "" : LMULReadAdvance<"ReadVSTUXV", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX8", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX16", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX32", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX64", 0>; +defm "" : LMULReadAdvance<"ReadVSTOXV", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>; +// LMUL Aware +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Vector Integer Arithmetic Instructions +foreach mx = SchedMxList in { + defvar Cycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in { + defm "" : LMULWriteResMX<"WriteVIALUV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIALUX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIALUI", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUI", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMI", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftI", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMinMaxV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMinMaxX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulAddV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulAddX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeI", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovI", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpI", [AscalonFX, AscalonV], mx, IsWorstCase>; + } +} +foreach mx = SchedMxList in { + defvar Cycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in { + defm "" : LMULWriteResMX<"WriteVExtV", [AscalonFX, AscalonV], mx, IsWorstCase>; + } +} +foreach mx = SchedMxList in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = AscalonGetCyclesDivOrSqrt.c; + defvar IsWorstCase = AscalonIsWorstCaseMXSEW.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [AscalonFX, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [AscalonFX, AscalonV], mx, sew, IsWorstCase>; + } + } +} + +// Widening +foreach mx = SchedMxListW in { + defvar Cycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in { + defm "" : LMULWriteResMX<"WriteVIWALUV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWALUX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWALUI", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulAddV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulAddX", [AscalonFX, AscalonV], mx, IsWorstCase>; + } +} +// Narrowing +foreach mx = SchedMxListW in { + defvar Cycles = AscalonGetCyclesNarrowing.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in { + defm "" : LMULWriteResMX<"WriteVNShiftV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNShiftX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNShiftI", [AscalonFX, AscalonV], mx, IsWorstCase>; + } +} + +// Vector Floating-Point Instructions +foreach mx = SchedMxListF in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMXSEW.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + } + } +} +foreach mx = SchedMxList in { + defvar Cycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in { + defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [AscalonFPA, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFClassV", [AscalonFP, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMergeV", [AscalonFP, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMovV", [AscalonFP, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFCmpV", [AscalonFP, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFCmpF", [AscalonFP, AscalonV], mx, IsWorstCase>; + } +} +foreach mx = SchedMxListF in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = AscalonGetCyclesDivOrSqrt.c; + defvar IsWorstCase = AscalonIsWorstCaseMXSEW.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + } + } +} + +// Widening +foreach mx = SchedMxListW in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMXSEW.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [AscalonFXB, AscalonV], mx, sew, IsWorstCase>; + } +} +foreach mx = SchedMxListFW in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMXSEW.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>; + } + } + defvar Cycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in + defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [AscalonFPA, AscalonV], mx, IsWorstCase>; +} +// Narrowing +foreach mx = SchedMxListW in { + defvar Cycles = AscalonGetCyclesNarrowing.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in { + defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [AscalonFPA, AscalonV], mx, IsWorstCase>; + } +} +foreach mx = SchedMxListFW in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = AscalonGetCyclesNarrowing.c; + defvar IsWorstCase = AscalonIsWorstCaseMXSEW.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [AscalonFXB, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [AscalonFXB, AscalonV], mx, sew, IsWorstCase>; + } + } +} + +// Vector Reduction Instructions +foreach mx = SchedMxList in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMXSEW.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [AscalonFX, AscalonV], + mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [AscalonFX, AscalonV], + mx, sew, IsWorstCase>; + } + } +} + +foreach mx = SchedMxListWRed in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMXSEW.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in + defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [AscalonFX, AscalonV], + mx, sew, IsWorstCase>; + } +} + +foreach mx = SchedMxListF in { + foreach sew = SchedSEWSet.val in { + defvar RedCycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMXSEW.c; + let Latency = RedCycles, ReleaseAtCycles = [1, RedCycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [AscalonFX, AscalonV], + mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [AscalonFX, AscalonV], + mx, sew, IsWorstCase>; + } + defvar OrdRedCycles = AscalonGetCyclesLMUL.c; + let Latency = OrdRedCycles, ReleaseAtCycles = [1, OrdRedCycles] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [AscalonFX, AscalonV], + mx, sew, IsWorstCase>; + } +} + +foreach mx = SchedMxListFWRed in { + foreach sew = SchedSEWSet.val in { + defvar RedCycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMXSEW.c; + let Latency = RedCycles, ReleaseAtCycles = [1, RedCycles] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [AscalonFX, AscalonV], + mx, sew, IsWorstCase>; + defvar OrdRedCycles = AscalonGetCyclesLMUL.c; + let Latency = OrdRedCycles, ReleaseAtCycles = [1, OrdRedCycles] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [AscalonFX, AscalonV], + mx, sew, IsWorstCase>; + } +} + +// Vector Mask Instructions +foreach mx = SchedMxList in { + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + defm "" : LMULWriteResMX<"WriteVMALUV", [AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVMSFSV", [AscalonV], mx, IsWorstCase>; + let Latency = 2, ReleaseAtCycles = [1, 2] in { + defm "" : LMULWriteResMX<"WriteVMPopV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVMFFSV", [AscalonFX, AscalonV], mx, IsWorstCase>; + } +} +foreach mx = SchedMxList in { + defvar Cycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in { + defm "" : LMULWriteResMX<"WriteVIotaV", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIdxV", [AscalonFX, AscalonV], mx, IsWorstCase>; + } +} + +// Vector Permutation Instructions +let Latency = 2, ReleaseAtCycles = [1, 2] in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} +foreach mx = SchedMxList in { + defvar Cycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = !mul(Cycles, 2), ReleaseAtCycles = [Cycles, !mul(Cycles, 2)] in { + defm "" : LMULWriteResMX<"WriteVRGatherVX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVRGatherVI", [AscalonFX, AscalonV], mx, IsWorstCase>; + } +} + +foreach mx = SchedMxList in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = AscalonGetCyclesVRGatherVV.c; + defvar IsWorstCase = AscalonIsWorstCaseMXSEW.c; + let Latency = !add(Cycles, 3), ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [AscalonFX, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [AscalonFX, AscalonV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [AscalonFX, AscalonV], mx, sew, IsWorstCase>; + } + } +} + +foreach mx = SchedMxList in { + defvar Cycles = AscalonGetCyclesDefault.c; + defvar IsWorstCase = AscalonIsWorstCaseMX.c; + let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVSlideUpX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSlideDownX", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSlideI", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVISlide1X", [AscalonFX, AscalonV], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFSlide1F", [AscalonFX, AscalonV], mx, IsWorstCase>; + } +} + +// Whole vector register move, vmv.v, LMUL aware +let Latency = 1, ReleaseAtCycles = [1] in + def : WriteRes; +let Latency = 2, ReleaseAtCycles = [2] in + def : WriteRes; +let Latency = 4, ReleaseAtCycles = [4] in + def : WriteRes; +let Latency = 8, ReleaseAtCycles = [8] in + def : WriteRes; + +// Vector Integer Arithmetic Instructions +defm : LMULReadAdvance<"ReadVIALUV", 0>; +defm : LMULReadAdvance<"ReadVIALUX", 0>; +defm : LMULReadAdvanceW<"ReadVIWALUV", 0>; +defm : LMULReadAdvanceW<"ReadVIWALUX", 0>; +defm : LMULReadAdvance<"ReadVExtV", 0>; +defm : LMULReadAdvance<"ReadVICALUV", 0>; +defm : LMULReadAdvance<"ReadVICALUX", 0>; +defm : LMULReadAdvance<"ReadVShiftV", 0>; +defm : LMULReadAdvance<"ReadVShiftX", 0>; +defm : LMULReadAdvanceW<"ReadVNShiftV", 0>; +defm : LMULReadAdvanceW<"ReadVNShiftX", 0>; +defm : LMULReadAdvance<"ReadVICmpV", 0>; +defm : LMULReadAdvance<"ReadVICmpX", 0>; +defm : LMULReadAdvance<"ReadVIMinMaxV", 0>; +defm : LMULReadAdvance<"ReadVIMinMaxX", 0>; +defm : LMULReadAdvance<"ReadVIMulV", 0>; +defm : LMULReadAdvance<"ReadVIMulX", 0>; +defm : LMULSEWReadAdvance<"ReadVIDivV", 0>; +defm : LMULSEWReadAdvance<"ReadVIDivX", 0>; +defm : LMULReadAdvanceW<"ReadVIWMulV", 0>; +defm : LMULReadAdvanceW<"ReadVIWMulX", 0>; +defm : LMULReadAdvance<"ReadVIMulAddV", 0>; +defm : LMULReadAdvance<"ReadVIMulAddX", 0>; +defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>; +defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>; +defm : LMULReadAdvance<"ReadVIMergeV", 0>; +defm : LMULReadAdvance<"ReadVIMergeX", 0>; +defm : LMULReadAdvance<"ReadVIMovV", 0>; +defm : LMULReadAdvance<"ReadVIMovX", 0>; + +// Vector Fixed-Point Arithmetic Instructions +defm "" : LMULReadAdvance<"ReadVSALUV", 0>; +defm "" : LMULReadAdvance<"ReadVSALUX", 0>; +defm "" : LMULReadAdvance<"ReadVAALUV", 0>; +defm "" : LMULReadAdvance<"ReadVAALUX", 0>; +defm "" : LMULReadAdvance<"ReadVSMulV", 0>; +defm "" : LMULReadAdvance<"ReadVSMulX", 0>; +defm "" : LMULReadAdvance<"ReadVSShiftV", 0>; +defm "" : LMULReadAdvance<"ReadVSShiftX", 0>; +defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>; +defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>; + +// Vector Floating-Point Instructions +defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMulV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMulF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>; +defm "" : LMULReadAdvance<"ReadVFCmpV", 0>; +defm "" : LMULReadAdvance<"ReadVFCmpF", 0>; +defm "" : LMULReadAdvance<"ReadVFClassV", 0>; +defm "" : LMULReadAdvance<"ReadVFMergeV", 0>; +defm "" : LMULReadAdvance<"ReadVFMergeF", 0>; +defm "" : LMULReadAdvance<"ReadVFMovF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>; +defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>; +defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>; +defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>; +defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>; + +// Vector Reduction Instructions +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Vector Mask Instructions +defm "" : LMULReadAdvance<"ReadVMALUV", 0>; +defm "" : LMULReadAdvance<"ReadVMPopV", 0>; +defm "" : LMULReadAdvance<"ReadVMFFSV", 0>; +defm "" : LMULReadAdvance<"ReadVMSFSV", 0>; +defm "" : LMULReadAdvance<"ReadVIotaV", 0>; + +// Vector Permutation Instructions +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +defm "" : LMULReadAdvance<"ReadVISlideV", 0>; +defm "" : LMULReadAdvance<"ReadVISlideX", 0>; +defm "" : LMULReadAdvance<"ReadVFSlideV", 0>; +defm "" : LMULReadAdvance<"ReadVFSlideF", 0>; +defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>; +defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>; +defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_data", 0>; +defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_index", 0>; +defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>; +defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>; +defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>; +defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>; +// LMUL Aware +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Others +def : ReadAdvance; +def : ReadAdvance; +foreach mx = SchedMxList in { + def : ReadAdvance("ReadVPassthru_" # mx), 0>; + foreach sew = SchedSEWSet.val in + def : ReadAdvance("ReadVPassthru_" # mx # "_E" # sew), 0>; +} + //===----------------------------------------------------------------------===// // Unsupported extensions defm : UnsupportedSchedQ; -defm : UnsupportedSchedV; defm : UnsupportedSchedZabha; defm : UnsupportedSchedZbc; defm : UnsupportedSchedZbkb; diff --git a/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s index 9fd16e1ffc1d6..7f44bd0eccdcb 100644 --- a/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s +++ b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s @@ -47,33 +47,35 @@ fsqrt.d ft2, fa3 # CHECK-NEXT: 1 12 6.00 fsqrt.d ft2, fa3 # CHECK: Resources: -# CHECK-NEXT: [0.0] - AscalonFP -# CHECK-NEXT: [0.1] - AscalonFP -# CHECK-NEXT: [1] - AscalonFXA -# CHECK-NEXT: [2] - AscalonFXB -# CHECK-NEXT: [3.0] - AscalonFXC -# CHECK-NEXT: [3.1] - AscalonFXC -# CHECK-NEXT: [4.0] - AscalonFXD -# CHECK-NEXT: [4.1] - AscalonFXD -# CHECK-NEXT: [5.0] - AscalonLS -# CHECK-NEXT: [5.1] - AscalonLS -# CHECK-NEXT: [5.2] - AscalonLS +# CHECK-NEXT: [0] - AscalonFPA +# CHECK-NEXT: [1] - AscalonFPB +# CHECK-NEXT: [2] - AscalonFXA +# CHECK-NEXT: [3] - AscalonFXB +# CHECK-NEXT: [4.0] - AscalonFXC +# CHECK-NEXT: [4.1] - AscalonFXC +# CHECK-NEXT: [5.0] - AscalonFXD +# CHECK-NEXT: [5.1] - AscalonFXD +# CHECK-NEXT: [6.0] - AscalonLS +# CHECK-NEXT: [6.1] - AscalonLS +# CHECK-NEXT: [6.2] - AscalonLS +# CHECK-NEXT: [7] - AscalonVA +# CHECK-NEXT: [8] - AscalonVB # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] -# CHECK-NEXT: 18.00 28.00 - - - - - - - - - +# CHECK-NEXT: [0] [1] [2] [3] [4.0] [4.1] [5.0] [5.1] [6.0] [6.1] [6.2] [7] [8] +# CHECK-NEXT: 18.00 28.00 - - - - - - - - - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] Instructions: -# CHECK-NEXT: - 1.00 - - - - - - - - - fmin.s ft0, fa0, fa1 -# CHECK-NEXT: 1.00 - - - - - - - - - - fmax.s ft1, fa0, fa1 -# CHECK-NEXT: - 1.00 - - - - - - - - - fmin.d ft2, ft4, ft5 -# CHECK-NEXT: 1.00 - - - - - - - - - - fmax.d ft3, ft4, ft5 -# CHECK-NEXT: - 1.00 - - - - - - - - - fmadd.s fs0, fs0, fs8, fs9 -# CHECK-NEXT: 1.00 - - - - - - - - - - fmsub.s fs1, fs1, fs8, fs9 -# CHECK-NEXT: - 1.00 - - - - - - - - - fmul.s fs3, fs3, fs4 -# CHECK-NEXT: 7.00 - - - - - - - - - - fdiv.s fs2, fs3, fs4 -# CHECK-NEXT: 1.00 - - - - - - - - - - fmul.d ft4, ft4, ft5 -# CHECK-NEXT: - 12.00 - - - - - - - - - fdiv.d fs4, fa3, ft5 -# CHECK-NEXT: 7.00 - - - - - - - - - - fsqrt.s ft1, fa2 -# CHECK-NEXT: - 12.00 - - - - - - - - - fsqrt.d ft2, fa3 +# CHECK-NEXT: [0] [1] [2] [3] [4.0] [4.1] [5.0] [5.1] [6.0] [6.1] [6.2] [7] [8] Instructions: +# CHECK-NEXT: - 1.00 - - - - - - - - - - - fmin.s ft0, fa0, fa1 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - fmax.s ft1, fa0, fa1 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - fmin.d ft2, ft4, ft5 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - fmax.d ft3, ft4, ft5 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - fmadd.s fs0, fs0, fs8, fs9 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - fmsub.s fs1, fs1, fs8, fs9 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - fmul.s fs3, fs3, fs4 +# CHECK-NEXT: 7.00 - - - - - - - - - - - - fdiv.s fs2, fs3, fs4 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - fmul.d ft4, ft4, ft5 +# CHECK-NEXT: - 12.00 - - - - - - - - - - - fdiv.d fs4, fa3, ft5 +# CHECK-NEXT: 7.00 - - - - - - - - - - - - fsqrt.s ft1, fa2 +# CHECK-NEXT: - 12.00 - - - - - - - - - - - fsqrt.d ft2, fa3 diff --git a/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s index 46cb4c6b8ae24..d10cc3f57e970 100644 --- a/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s +++ b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s @@ -47,33 +47,35 @@ fcvt.w.s t5, f3 # CHECK-NEXT: 1 1 1.00 fcvt.w.s t5, ft3 # CHECK: Resources: -# CHECK-NEXT: [0.0] - AscalonFP -# CHECK-NEXT: [0.1] - AscalonFP -# CHECK-NEXT: [1] - AscalonFXA -# CHECK-NEXT: [2] - AscalonFXB -# CHECK-NEXT: [3.0] - AscalonFXC -# CHECK-NEXT: [3.1] - AscalonFXC -# CHECK-NEXT: [4.0] - AscalonFXD -# CHECK-NEXT: [4.1] - AscalonFXD -# CHECK-NEXT: [5.0] - AscalonLS -# CHECK-NEXT: [5.1] - AscalonLS -# CHECK-NEXT: [5.2] - AscalonLS +# CHECK-NEXT: [0] - AscalonFPA +# CHECK-NEXT: [1] - AscalonFPB +# CHECK-NEXT: [2] - AscalonFXA +# CHECK-NEXT: [3] - AscalonFXB +# CHECK-NEXT: [4.0] - AscalonFXC +# CHECK-NEXT: [4.1] - AscalonFXC +# CHECK-NEXT: [5.0] - AscalonFXD +# CHECK-NEXT: [5.1] - AscalonFXD +# CHECK-NEXT: [6.0] - AscalonLS +# CHECK-NEXT: [6.1] - AscalonLS +# CHECK-NEXT: [6.2] - AscalonLS +# CHECK-NEXT: [7] - AscalonVA +# CHECK-NEXT: [8] - AscalonVB # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] -# CHECK-NEXT: - - 17.00 1.00 2.00 2.00 1.00 1.00 - - - +# CHECK-NEXT: [0] [1] [2] [3] [4.0] [4.1] [5.0] [5.1] [6.0] [6.1] [6.2] [7] [8] +# CHECK-NEXT: - - 17.00 1.00 2.00 2.00 1.00 1.00 - - - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] Instructions: -# CHECK-NEXT: - - 1.00 - - - - - - - - mul t0, a0, t0 -# CHECK-NEXT: - - - - - - - 1.00 - - - sub s2, a2, a3 -# CHECK-NEXT: - - 7.00 - - - - - - - - div t1, t2, t3 -# CHECK-NEXT: - - - - - - 1.00 - - - - add t1, a4, gp -# CHECK-NEXT: - - 7.00 - - - - - - - - div a1, a2, a3 -# CHECK-NEXT: - - - - - 1.00 - - - - - add t1, a0, t0 -# CHECK-NEXT: - - 1.00 - - - - - - - - mul s0, s0, a5 -# CHECK-NEXT: - - - - - 1.00 - - - - - add t2, t2, t2 -# CHECK-NEXT: - - - - 1.00 - - - - - - sub s1, s0, s1 -# CHECK-NEXT: - - - 1.00 - - - - - - - fcvt.s.w ft1, t3 -# CHECK-NEXT: - - - - 1.00 - - - - - - add s2, s2, s2 -# CHECK-NEXT: - - 1.00 - - - - - - - - fcvt.w.s t5, ft3 +# CHECK-NEXT: [0] [1] [2] [3] [4.0] [4.1] [5.0] [5.1] [6.0] [6.1] [6.2] [7] [8] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - - - - - - mul t0, a0, t0 +# CHECK-NEXT: - - - - - - - 1.00 - - - - - sub s2, a2, a3 +# CHECK-NEXT: - - 7.00 - - - - - - - - - - div t1, t2, t3 +# CHECK-NEXT: - - - - - - 1.00 - - - - - - add t1, a4, gp +# CHECK-NEXT: - - 7.00 - - - - - - - - - - div a1, a2, a3 +# CHECK-NEXT: - - - - - 1.00 - - - - - - - add t1, a0, t0 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - mul s0, s0, a5 +# CHECK-NEXT: - - - - - 1.00 - - - - - - - add t2, t2, t2 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - sub s1, s0, s1 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - fcvt.s.w ft1, t3 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - add s2, s2, s2 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - fcvt.w.s t5, ft3 diff --git a/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/vdiv_vsqrt.s b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/vdiv_vsqrt.s new file mode 100644 index 0000000000000..53711ed515663 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/vdiv_vsqrt.s @@ -0,0 +1,1016 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=tt-ascalon-d8 -instruction-tables=full -iterations=1 < %s | FileCheck %s + +vsetvli zero, zero, e8, mf8, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e8, mf4, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e8, mf2, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e8, m1, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e8, m1, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e8, m2, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e8, m4, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e8, m8, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e16, mf8, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e16, mf4, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e16, mf2, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e16, m1, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e16, m1, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e16, m2, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e16, m4, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e16, m8, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e32, mf8, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e32, mf4, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e32, mf2, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e32, m1, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e32, m1, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e32, m2, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e32, m4, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e32, m8, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e64, mf8, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e64, mf4, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e64, mf2, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e64, m1, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e64, m1, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e64, m2, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e64, m4, tu, mu +vdiv.vv v8, v16, v24 +vsetvli zero, zero, e64, m8, tu, mu +vdiv.vv v8, v16, v24 + +vsetvli zero, zero, e8, mf8, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e8, mf4, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e8, mf2, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e8, m1, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e8, m1, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e8, m2, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e8, m4, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e8, m8, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e16, mf8, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e16, mf4, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e16, mf2, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e16, m1, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e16, m1, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e16, m2, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e16, m4, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e16, m8, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e32, mf8, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e32, mf4, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e32, mf2, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e32, m1, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e32, m1, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e32, m2, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e32, m4, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e32, m8, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e64, mf8, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e64, mf4, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e64, mf2, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e64, m1, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e64, m1, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e64, m2, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e64, m4, tu, mu +vdiv.vx v8, v16, a0 +vsetvli zero, zero, e64, m8, tu, mu +vdiv.vx v8, v16, a0 + +vsetvli zero, zero, e8, mf8, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e8, mf4, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e8, mf2, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e8, m1, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e8, m1, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e8, m2, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e8, m4, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e8, m8, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e16, mf8, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e16, mf4, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e16, mf2, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e16, m1, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e16, m1, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e16, m2, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e16, m4, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e16, m8, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e32, mf8, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e32, mf4, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e32, mf2, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e32, m1, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e32, m1, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e32, m2, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e32, m4, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e32, m8, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e64, mf8, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e64, mf4, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e64, mf2, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e64, m1, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e64, m1, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e64, m2, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e64, m4, tu, mu +vfdiv.vv v8, v16, v24 +vsetvli zero, zero, e64, m8, tu, mu +vfdiv.vv v8, v16, v24 + +vsetvli zero, zero, e8, mf8, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e8, mf4, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e8, mf2, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e8, m1, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e8, m1, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e8, m2, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e8, m4, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e8, m8, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e16, mf8, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e16, mf4, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e16, mf2, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e16, m1, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e16, m1, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e16, m2, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e16, m4, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e16, m8, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e32, mf8, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e32, mf4, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e32, mf2, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e32, m1, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e32, m1, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e32, m2, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e32, m4, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e32, m8, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e64, mf8, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e64, mf4, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e64, mf2, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e64, m1, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e64, m1, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e64, m2, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e64, m4, tu, mu +vfdiv.vf v8, v16, fa0 +vsetvli zero, zero, e64, m8, tu, mu +vfdiv.vf v8, v16, fa0 + +vsetvli zero, zero, e8, mf8, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e8, mf4, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e8, mf2, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e8, m1, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e8, m1, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e8, m2, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e8, m4, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e8, m8, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e16, mf8, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e16, mf4, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e16, mf2, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e16, m1, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e16, m1, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e16, m2, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e16, m4, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e16, m8, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e32, mf8, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e32, mf4, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e32, mf2, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e32, m1, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e32, m1, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e32, m2, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e32, m4, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e32, m8, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e64, mf8, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e64, mf4, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e64, mf2, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e64, m1, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e64, m1, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e64, m2, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e64, m4, tu, mu +vfsqrt.v v8, v16 +vsetvli zero, zero, e64, m8, tu, mu +vfsqrt.v v8, v16 + +# CHECK: Resources: +# CHECK-NEXT: [0] - AscalonFP:2 AscalonFPA, AscalonFPB +# CHECK-NEXT: [1] - AscalonFPA:1 +# CHECK-NEXT: [2] - AscalonFPB:1 +# CHECK-NEXT: [3] - AscalonFX:6 AscalonFXA, AscalonFXB, AscalonFXC, AscalonFXC, AscalonFXD, AscalonFXD +# CHECK-NEXT: [4] - AscalonFXA:1 +# CHECK-NEXT: [5] - AscalonFXB:1 +# CHECK-NEXT: [6] - AscalonFXC:2 +# CHECK-NEXT: [7] - AscalonFXD:2 +# CHECK-NEXT: [8] - AscalonLS:3 +# CHECK-NEXT: [9] - AscalonV:2 AscalonVA, AscalonVB +# CHECK-NEXT: [10] - AscalonVA:1 +# CHECK-NEXT: [11] - AscalonVB:1 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) +# CHECK-NEXT: [7]: Bypass Latency +# CHECK-NEXT: [8]: Resources ( | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [,