From 4ae7b58c02a81085f2fc6d287dbae3c7c5b4a5a1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 10 Sep 2025 15:24:08 +0100 Subject: [PATCH] [X86] Fix throughput typo in XMM/YMM PACK/PALIGNR schedule classes Only the ZMM PACK/PALIGNR instructions are half-rate on znver4 - confirmed with AMD SOG, uops.info and Agner Noticed because comparing costs table shuffle costs vs llvm-mca costs kept giving weird numbers if I tested it on znver4 vs x86-64-v4 It looks like there's other znver4 overrides that make this mistake but many of these need cleaning up to use the (currently unused) default classes properly --- llvm/lib/Target/X86/X86ScheduleZnver4.td | 38 ++++-- .../llvm-mca/X86/Znver4/resources-avx1.s | 22 ++-- .../llvm-mca/X86/Znver4/resources-avx2.s | 10 +- .../X86/Znver4/resources-avx512bwvl.s | 122 +++++++++--------- .../llvm-mca/X86/Znver4/resources-sse2.s | 14 +- .../llvm-mca/X86/Znver4/resources-sse41.s | 6 +- .../llvm-mca/X86/Znver4/resources-ssse3.s | 6 +- 7 files changed, 117 insertions(+), 101 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td index f4b8f8927b1b5..a93c7e3a82f17 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver4.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td @@ -1534,9 +1534,9 @@ def Zn4WriteVFIXUPIMMPDZrr_VRANGESDrr : SchedWriteRes<[Zn4FPFMisc01]> { let NumMicroOps = 1; } def : InstRW<[Zn4WriteVFIXUPIMMPDZrr_VRANGESDrr], (instregex - "VFIXUPIMM(S|P)(S|D)(Z|Z128|Z256?)rrik", "VFIXUPIMM(S|P)(S|D)(Z?|Z128?|Z256?)rrikz", + "VFIXUPIMM(S|P)(S|D)(Z|Z128|Z256?)rrik", "VFIXUPIMM(S|P)(S|D)(Z?|Z128?|Z256?)rrikz", "VFIXUPIMM(S|P)(S|D)(Z128|Z256?)rri", "VRANGE(S|P)(S|D)(Z?|Z128?|Z256?)rri(b?)", - "VRANGE(S|P)(S|D)(Z|Z128|Z256?)rri(b?)k","VRANGE(S|P)(S|D)(Z?|Z128?|Z256?)rri(b?)kz" + "VRANGE(S|P)(S|D)(Z|Z128|Z256?)rri(b?)k","VRANGE(S|P)(S|D)(Z?|Z128?|Z256?)rri(b?)kz" )>; // SCALE & REDUCE instructions @@ -1567,7 +1567,7 @@ def Zn4WriteBUSDr_VPMADDr: SchedWriteRes<[Zn4FPFMisc01]> { let NumMicroOps = 1; } def : InstRW<[Zn4WriteBUSDr_VPMADDr], (instregex - "VPDP(BU|WS)(S|P)(S|D|DS)(Z|Z128|Z256)(r|rk|rkz)", + "VPDP(BU|WS)(S|P)(S|D|DS)(Z|Z128|Z256)(r|rk|rkz)", "VPMADD52(H|L)UQ(Z|Z128|Z256)(r|rk|rkz)" )>; @@ -1586,7 +1586,7 @@ def : InstRW<[Zn4WriteSHIFTrr], (instregex "(V?)P(ROL|ROR)(D|Q|VD|VQ)(Z?|Z128?|Z256?)(rr|rrk|rrkz)", "(V?)P(ROL|ROR)(D|Q|VD|VQ)(Z256?)(ri|rik|rikz)", "(V?)P(ROL|ROR)(D|Q)(Z?|Z128?)(ri|rik|rikz)", - "VPSHUFBITQMBZ128rr", "VFMSUB231SSZrkz_Int" + "VPSHUFBITQMBZ128rr", "VFMSUB231SSZrkz_Int" )>; def Zn4WriteSHIFTri: SchedWriteRes<[Zn4FPFMisc01]> { @@ -1598,24 +1598,40 @@ def : InstRW<[Zn4WriteSHIFTri], (instregex "VP(SLL|SRL|SRA)(D|Q|W)(Z|Z128|Z256?)(ri|rik|rikz)" )>; -// ALIGN Instructions -def Zn4WriteALIGN: SchedWriteRes<[Zn4FPFMisc12]> { +// ALIGNR Instructions +def Zn4WriteALIGNR: SchedWriteRes<[Zn4FPFMisc12]> { + let Latency = 2; + let ReleaseAtCycles = [1]; + let NumMicroOps = 1; +} +def : InstRW<[Zn4WriteALIGNR], (instregex + "(V?)PALIGNR(Y?|Z128?|Z256?)(rri|rrik|rrikz)" + )>; +def Zn4WriteALIGNRZ: SchedWriteRes<[Zn4FPFMisc12]> { let Latency = 2; let ReleaseAtCycles = [2]; let NumMicroOps = 1; } -def : InstRW<[Zn4WriteALIGN], (instregex - "(V?)PALIGNR(Z?|Z128?|Z256?)(rri|rrik|rrikz)" +def : InstRW<[Zn4WriteALIGNRZ], (instregex + "(V?)PALIGNRZ(rri|rrik|rrikz)" )>; -//PACK Instructions +// PACK Instructions def Zn4WritePACK: SchedWriteRes<[Zn4FPFMisc12]> { let Latency = 2; - let ReleaseAtCycles = [2]; + let ReleaseAtCycles = [1]; let NumMicroOps = 1; } def : InstRW<[Zn4WritePACK], (instregex - "(V?)PACK(SS|US)(DW|WB)(Z?|Z128?|Z256?)(rr|rrk|rrkz)" + "(V?)PACK(SS|US)(DW|WB)(Y?|Z128?|Z256?)(rr|rrk|rrkz)" + )>; +def Zn4WritePACKZ: SchedWriteRes<[Zn4FPFMisc12]> { + let Latency = 2; + let ReleaseAtCycles = [2]; + let NumMicroOps = 1; +} +def : InstRW<[Zn4WritePACKZ], (instregex + "(V?)PACK(SS|US)(DW|WB)Z(rr|rrk|rrkz)" )>; // MAX and MIN Instructions diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s index 9b721c933ab51..1ffe53366fdb0 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s @@ -1365,13 +1365,13 @@ vzeroupper # CHECK-NEXT: 1 8 0.50 * vpabsd (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 vpabsw %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpabsw (%rax), %xmm2 -# CHECK-NEXT: 1 2 1.00 vpackssdw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpackssdw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpackssdw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 2 1.00 vpacksswb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpacksswb %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpacksswb (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 2 1.00 vpackusdw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpackusdw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpackusdw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 2 1.00 vpackuswb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpackuswb %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpackuswb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpaddb %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpaddb (%rax), %xmm1, %xmm2 @@ -1389,7 +1389,7 @@ vzeroupper # CHECK-NEXT: 1 8 0.50 * vpaddusw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpaddw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpaddw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 2 1.00 vpalignr $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpalignr $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpalignr $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpand %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpand (%rax), %xmm1, %xmm2 @@ -1749,7 +1749,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: 1.33 1.33 1.33 16.50 16.50 16.50 16.50 - 205.25 396.08 270.58 158.08 208.50 208.50 65.00 119.67 119.67 119.67 107.00 107.00 107.00 19.00 19.00 +# CHECK-NEXT: 1.33 1.33 1.33 16.50 16.50 16.50 16.50 - 205.25 393.58 268.08 158.08 208.50 208.50 65.00 119.67 119.67 119.67 107.00 107.00 107.00 19.00 19.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -2088,13 +2088,13 @@ vzeroupper # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpabsd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vpabsw %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpabsw (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackssdw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackssdw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackssdw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpacksswb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpacksswb %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpacksswb (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackusdw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackusdw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackusdw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackuswb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackuswb %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackuswb (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpaddb %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddb (%rax), %xmm1, %xmm2 @@ -2112,7 +2112,7 @@ vzeroupper # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddusw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpaddw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpalignr $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpalignr $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpalignr $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpand %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpand (%rax), %xmm1, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s index 25e367c96e44b..6dc5bacde9059 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s @@ -484,13 +484,13 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpabsd (%rax), %ymm2 # CHECK-NEXT: 1 1 0.50 vpabsw %ymm0, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpabsw (%rax), %ymm2 -# CHECK-NEXT: 1 1 0.50 vpackssdw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 2 0.50 vpackssdw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpackssdw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.50 vpacksswb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 2 0.50 vpacksswb %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpacksswb (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.50 vpackusdw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 2 0.50 vpackusdw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpackusdw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.50 vpackuswb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 2 0.50 vpackuswb %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpackuswb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpaddb %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpaddb (%rax), %ymm1, %ymm2 @@ -508,7 +508,7 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpaddusw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpaddw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpaddw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.50 vpalignr $1, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 2 0.50 vpalignr $1, %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpalignr $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpand %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpand (%rax), %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bwvl.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bwvl.s index a298dd69ee9b3..79f2cb4b7ab82 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bwvl.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bwvl.s @@ -1166,53 +1166,53 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 8 0.50 * vpabsw (%rax), %ymm19 {%k1} # CHECK-NEXT: 1 1 0.25 vpabsw %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: 1 8 0.50 * vpabsw (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1 2 1.00 vpackssdw %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 1 2 0.50 vpackssdw %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 1 8 0.50 * vpackssdw (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 1 2 1.00 vpackssdw %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 2 0.50 vpackssdw %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1 8 0.50 * vpackssdw (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1 2 1.00 vpackssdw %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpackssdw %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1 8 0.50 * vpackssdw (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1 2 1.00 vpackssdw %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 1 2 0.50 vpackssdw %ymm16, %ymm17, %ymm19 # CHECK-NEXT: 1 8 0.50 * vpackssdw (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 1 2 1.00 vpackssdw %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 2 0.50 vpackssdw %ymm16, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1 8 0.50 * vpackssdw (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1 2 1.00 vpackssdw %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpackssdw %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1 8 0.50 * vpackssdw (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1 2 1.00 vpacksswb %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 1 2 0.50 vpacksswb %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 1 8 0.50 * vpacksswb (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 1 2 1.00 vpacksswb %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 2 0.50 vpacksswb %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1 8 0.50 * vpacksswb (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1 2 1.00 vpacksswb %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpacksswb %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1 8 0.50 * vpacksswb (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1 2 1.00 vpacksswb %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 1 2 0.50 vpacksswb %ymm16, %ymm17, %ymm19 # CHECK-NEXT: 1 8 0.50 * vpacksswb (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 1 2 1.00 vpacksswb %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 2 0.50 vpacksswb %ymm16, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1 8 0.50 * vpacksswb (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1 2 1.00 vpacksswb %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpacksswb %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1 8 0.50 * vpacksswb (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1 2 1.00 vpackusdw %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 1 2 0.50 vpackusdw %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 1 8 0.50 * vpackusdw (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 1 2 1.00 vpackusdw %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 2 0.50 vpackusdw %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1 8 0.50 * vpackusdw (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1 2 1.00 vpackusdw %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpackusdw %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1 8 0.50 * vpackusdw (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1 2 1.00 vpackusdw %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 1 2 0.50 vpackusdw %ymm16, %ymm17, %ymm19 # CHECK-NEXT: 1 8 0.50 * vpackusdw (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 1 2 1.00 vpackusdw %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 2 0.50 vpackusdw %ymm16, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1 8 0.50 * vpackusdw (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1 2 1.00 vpackusdw %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpackusdw %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1 8 0.50 * vpackusdw (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1 2 1.00 vpackuswb %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 1 2 0.50 vpackuswb %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 1 8 0.50 * vpackuswb (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 1 2 1.00 vpackuswb %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 2 0.50 vpackuswb %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1 8 0.50 * vpackuswb (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1 2 1.00 vpackuswb %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpackuswb %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1 8 0.50 * vpackuswb (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1 2 1.00 vpackuswb %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 1 2 0.50 vpackuswb %ymm16, %ymm17, %ymm19 # CHECK-NEXT: 1 8 0.50 * vpackuswb (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 1 2 1.00 vpackuswb %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 2 0.50 vpackuswb %ymm16, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1 8 0.50 * vpackuswb (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1 2 1.00 vpackuswb %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpackuswb %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1 8 0.50 * vpackuswb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1 1 0.25 vpaddb %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 1 8 0.50 * vpaddb (%rax), %xmm17, %xmm19 @@ -1286,17 +1286,17 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 8 0.50 * vpaddw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1 1 0.25 vpaddw %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1 8 0.50 * vpaddw (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1 2 1.00 vpalignr $1, %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 1 2 0.50 vpalignr $1, %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 1 8 0.50 * vpalignr $1, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 1 2 1.00 vpalignr $1, %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 2 0.50 vpalignr $1, %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1 8 0.50 * vpalignr $1, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1 2 1.00 vpalignr $1, %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpalignr $1, %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1 8 0.50 * vpalignr $1, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1 2 1.00 vpalignr $1, %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 1 2 0.50 vpalignr $1, %ymm16, %ymm17, %ymm19 # CHECK-NEXT: 1 8 0.50 * vpalignr $1, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 1 2 1.00 vpalignr $1, %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 2 0.50 vpalignr $1, %ymm16, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1 8 0.50 * vpalignr $1, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1 2 1.00 vpalignr $1, %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpalignr $1, %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1 8 0.50 * vpalignr $1, (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1 2 1.00 vpavgb %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 1 8 0.50 * vpavgb (%rax), %xmm17, %xmm19 @@ -2048,7 +2048,7 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: - - - 4.00 4.00 4.00 4.00 - 233.00 411.50 300.50 140.00 226.00 226.00 8.00 150.67 150.67 150.67 148.00 148.00 148.00 4.00 4.00 +# CHECK-NEXT: - - - 4.00 4.00 4.00 4.00 - 233.00 396.50 285.50 140.00 226.00 226.00 8.00 150.67 150.67 150.67 148.00 148.00 148.00 4.00 4.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -2120,53 +2120,53 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpabsw (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpabsw %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpabsw (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackssdw %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackssdw %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackssdw (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackssdw %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackssdw %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackssdw (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackssdw %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackssdw %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackssdw (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackssdw %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackssdw %ymm16, %ymm17, %ymm19 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackssdw (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackssdw %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackssdw %ymm16, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackssdw (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackssdw %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackssdw %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackssdw (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpacksswb %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpacksswb %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpacksswb (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpacksswb %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpacksswb %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpacksswb (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpacksswb %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpacksswb %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpacksswb (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpacksswb %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpacksswb %ymm16, %ymm17, %ymm19 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpacksswb (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpacksswb %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpacksswb %ymm16, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpacksswb (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpacksswb %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpacksswb %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpacksswb (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackusdw %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackusdw %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackusdw (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackusdw %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackusdw %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackusdw (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackusdw %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackusdw %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackusdw (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackusdw %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackusdw %ymm16, %ymm17, %ymm19 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackusdw (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackusdw %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackusdw %ymm16, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackusdw (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackusdw %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackusdw %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackusdw (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackuswb %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackuswb %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackuswb (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackuswb %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackuswb %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackuswb (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackuswb %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackuswb %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackuswb (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackuswb %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackuswb %ymm16, %ymm17, %ymm19 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackuswb (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackuswb %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackuswb %ymm16, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackuswb (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpackuswb %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackuswb %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackuswb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpaddb %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddb (%rax), %xmm17, %xmm19 @@ -2240,17 +2240,17 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpaddw %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddw (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpalignr $1, %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpalignr $1, %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpalignr $1, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpalignr $1, %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpalignr $1, %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpalignr $1, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpalignr $1, %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpalignr $1, %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpalignr $1, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpalignr $1, %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpalignr $1, %ymm16, %ymm17, %ymm19 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpalignr $1, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpalignr $1, %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpalignr $1, %ymm16, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpalignr $1, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpalignr $1, %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpalignr $1, %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpalignr $1, (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vpavgb %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpavgb (%rax), %xmm17, %xmm19 diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse2.s index dde829373465b..f9f02465bc7d5 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse2.s @@ -516,11 +516,11 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 10 0.50 * mulsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 orpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * orpd (%rax), %xmm2 -# CHECK-NEXT: 1 2 1.00 packssdw %xmm0, %xmm2 +# CHECK-NEXT: 1 2 0.50 packssdw %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * packssdw (%rax), %xmm2 -# CHECK-NEXT: 1 2 1.00 packsswb %xmm0, %xmm2 +# CHECK-NEXT: 1 2 0.50 packsswb %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * packsswb (%rax), %xmm2 -# CHECK-NEXT: 1 2 1.00 packuswb %xmm0, %xmm2 +# CHECK-NEXT: 1 2 0.50 packuswb %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * packuswb (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 paddb %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * paddb (%rax), %xmm2 @@ -702,7 +702,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: 1.00 1.00 1.00 25.00 25.00 25.00 25.00 - 58.25 140.25 81.25 53.25 72.00 72.00 12.00 54.67 54.67 54.67 39.33 39.33 39.33 8.00 8.00 +# CHECK-NEXT: 1.00 1.00 1.00 25.00 25.00 25.00 25.00 - 58.25 138.75 79.75 53.25 72.00 72.00 12.00 54.67 54.67 54.67 39.33 39.33 39.33 8.00 8.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -815,11 +815,11 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulsd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - orpd %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - orpd (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - packssdw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - packssdw %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - packssdw (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - packsswb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - packsswb %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - packsswb (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - packuswb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - packuswb %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - packuswb (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - paddb %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddb (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse41.s index b0321c13f48c5..7f5c50c8b93ab 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse41.s @@ -174,7 +174,7 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 1 8 0.50 * movntdqa (%rax), %xmm2 # CHECK-NEXT: 4 4 2.00 mpsadbw $1, %xmm0, %xmm2 # CHECK-NEXT: 6 11 2.00 * mpsadbw $1, (%rax), %xmm2 -# CHECK-NEXT: 1 2 1.00 packusdw %xmm0, %xmm2 +# CHECK-NEXT: 1 2 0.50 packusdw %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * packusdw (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 pblendvb %xmm0, %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * pblendvb %xmm0, (%rax), %xmm2 @@ -279,7 +279,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: - - - - - - - - 31.00 43.50 28.50 16.00 35.50 35.50 7.00 16.33 16.33 16.33 14.67 14.67 14.67 2.50 2.50 +# CHECK-NEXT: - - - - - - - - 31.00 43.00 28.00 16.00 35.50 35.50 7.00 16.33 16.33 16.33 14.67 14.67 14.67 2.50 2.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -302,7 +302,7 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movntdqa (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - mpsadbw $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - mpsadbw $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - packusdw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - packusdw %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - packusdw (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pblendvb %xmm0, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pblendvb %xmm0, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-ssse3.s index 173c72171aafe..2feee84efa8ec 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-ssse3.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-ssse3.s @@ -120,7 +120,7 @@ psignw (%rax), %xmm2 # CHECK-NEXT: 1 8 0.50 * pabsw (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 palignr $1, %mm0, %mm2 # CHECK-NEXT: 1 8 0.50 * palignr $1, (%rax), %mm2 -# CHECK-NEXT: 1 2 1.00 palignr $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 2 0.50 palignr $1, %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * palignr $1, (%rax), %xmm2 # CHECK-NEXT: 3 2 2.00 phaddd %mm0, %mm2 # CHECK-NEXT: 4 9 2.00 * phaddd (%rax), %mm2 @@ -198,7 +198,7 @@ psignw (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: - - - - - - - - 62.50 15.00 9.00 8.50 16.00 16.00 - 10.67 10.67 10.67 10.67 10.67 10.67 - - +# CHECK-NEXT: - - - - - - - - 62.50 14.50 8.50 8.50 16.00 16.00 - 10.67 10.67 10.67 10.67 10.67 10.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -216,7 +216,7 @@ psignw (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pabsw (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - palignr $1, %mm0, %mm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - palignr $1, (%rax), %mm2 -# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - palignr $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - palignr $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - palignr $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - phaddd %mm0, %mm2 # CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - phaddd (%rax), %mm2