Skip to content

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Sep 10, 2025

Only the ZMM PACK/PALIGNR instructions are half-rate on znver4 - confirmed with AMD SOG, uops.info and Agner

Noticed because comparing costs table shuffle costs vs llvm-mca costs kept giving weird numbers if I tested it on znver4 vs any other avx2/avx512 target

It looks like there's other znver4 overrides that make this mistake but many of these need cleaning up properly to use the (currently unused) default classes

Only the ZMM PACK/PALIGNR instructions are half-rate on znver4 - confirmed with AMD SOG, uops.info and Agner

Noticed because comparing costs table shuffle costs vs llvm-mca costs kept giving weird numbers if I tested it on znver4 vs x86-64-v4

It looks like there's other znver4 overrides that make this mistake but many of these need cleaning up to use the (currently unused) default classes properly
@RKSimon RKSimon enabled auto-merge (squash) September 10, 2025 14:33
@llvmbot
Copy link
Member

llvmbot commented Sep 10, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Only the ZMM PACK/PALIGNR instructions are half-rate on znver4 - confirmed with AMD SOG, uops.info and Agner

Noticed because comparing costs table shuffle costs vs llvm-mca costs kept giving weird numbers if I tested it on znver4 vs any other avx2/avx512 target

It looks like there's other znver4 overrides that make this mistake but many of these need cleaning up properly to use the (currently unused) default classes


Patch is 57.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/157867.diff

7 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ScheduleZnver4.td (+27-11)
  • (modified) llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s (+11-11)
  • (modified) llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s (+5-5)
  • (modified) llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bwvl.s (+61-61)
  • (modified) llvm/test/tools/llvm-mca/X86/Znver4/resources-sse2.s (+7-7)
  • (modified) llvm/test/tools/llvm-mca/X86/Znver4/resources-sse41.s (+3-3)
  • (modified) llvm/test/tools/llvm-mca/X86/Znver4/resources-ssse3.s (+3-3)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index f4b8f8927b1b5..a93c7e3a82f17 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -1534,9 +1534,9 @@ def Zn4WriteVFIXUPIMMPDZrr_VRANGESDrr : SchedWriteRes<[Zn4FPFMisc01]> {
   let NumMicroOps = 1;
 }
 def : InstRW<[Zn4WriteVFIXUPIMMPDZrr_VRANGESDrr], (instregex
-	"VFIXUPIMM(S|P)(S|D)(Z|Z128|Z256?)rrik", "VFIXUPIMM(S|P)(S|D)(Z?|Z128?|Z256?)rrikz", 
+        "VFIXUPIMM(S|P)(S|D)(Z|Z128|Z256?)rrik", "VFIXUPIMM(S|P)(S|D)(Z?|Z128?|Z256?)rrikz", 
         "VFIXUPIMM(S|P)(S|D)(Z128|Z256?)rri",  "VRANGE(S|P)(S|D)(Z?|Z128?|Z256?)rri(b?)",
-	"VRANGE(S|P)(S|D)(Z|Z128|Z256?)rri(b?)k","VRANGE(S|P)(S|D)(Z?|Z128?|Z256?)rri(b?)kz"
+        "VRANGE(S|P)(S|D)(Z|Z128|Z256?)rri(b?)k","VRANGE(S|P)(S|D)(Z?|Z128?|Z256?)rri(b?)kz"
 	)>;
 
 // SCALE & REDUCE instructions
@@ -1567,7 +1567,7 @@ def Zn4WriteBUSDr_VPMADDr: SchedWriteRes<[Zn4FPFMisc01]> {
   let NumMicroOps = 1;
 }
 def : InstRW<[Zn4WriteBUSDr_VPMADDr], (instregex
-	"VPDP(BU|WS)(S|P)(S|D|DS)(Z|Z128|Z256)(r|rk|rkz)",
+        "VPDP(BU|WS)(S|P)(S|D|DS)(Z|Z128|Z256)(r|rk|rkz)",
         "VPMADD52(H|L)UQ(Z|Z128|Z256)(r|rk|rkz)"
 	)>;
 
@@ -1586,7 +1586,7 @@ def : InstRW<[Zn4WriteSHIFTrr], (instregex
         "(V?)P(ROL|ROR)(D|Q|VD|VQ)(Z?|Z128?|Z256?)(rr|rrk|rrkz)",
         "(V?)P(ROL|ROR)(D|Q|VD|VQ)(Z256?)(ri|rik|rikz)",
         "(V?)P(ROL|ROR)(D|Q)(Z?|Z128?)(ri|rik|rikz)",
-	"VPSHUFBITQMBZ128rr", "VFMSUB231SSZrkz_Int"
+        "VPSHUFBITQMBZ128rr", "VFMSUB231SSZrkz_Int"
 	)>;
 
 def Zn4WriteSHIFTri: SchedWriteRes<[Zn4FPFMisc01]> {
@@ -1598,24 +1598,40 @@ def : InstRW<[Zn4WriteSHIFTri], (instregex
         "VP(SLL|SRL|SRA)(D|Q|W)(Z|Z128|Z256?)(ri|rik|rikz)"
 	)>;
 
-// ALIGN Instructions
-def Zn4WriteALIGN: SchedWriteRes<[Zn4FPFMisc12]> {
+// ALIGNR Instructions
+def Zn4WriteALIGNR: SchedWriteRes<[Zn4FPFMisc12]> {
+  let Latency = 2;
+  let ReleaseAtCycles = [1];
+  let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteALIGNR], (instregex
+        "(V?)PALIGNR(Y?|Z128?|Z256?)(rri|rrik|rrikz)"
+	)>;
+def Zn4WriteALIGNRZ: SchedWriteRes<[Zn4FPFMisc12]> {
   let Latency = 2;
   let ReleaseAtCycles = [2];
   let NumMicroOps = 1;
 }
-def : InstRW<[Zn4WriteALIGN], (instregex
-        "(V?)PALIGNR(Z?|Z128?|Z256?)(rri|rrik|rrikz)"
+def : InstRW<[Zn4WriteALIGNRZ], (instregex
+        "(V?)PALIGNRZ(rri|rrik|rrikz)"
 	)>;
 
-//PACK Instructions
+// PACK Instructions
 def Zn4WritePACK: SchedWriteRes<[Zn4FPFMisc12]> {
   let Latency = 2;
-  let ReleaseAtCycles = [2];
+  let ReleaseAtCycles = [1];
   let NumMicroOps = 1;
 }
 def : InstRW<[Zn4WritePACK], (instregex
-        "(V?)PACK(SS|US)(DW|WB)(Z?|Z128?|Z256?)(rr|rrk|rrkz)"
+        "(V?)PACK(SS|US)(DW|WB)(Y?|Z128?|Z256?)(rr|rrk|rrkz)"
+	)>;
+def Zn4WritePACKZ: SchedWriteRes<[Zn4FPFMisc12]> {
+  let Latency = 2;
+  let ReleaseAtCycles = [2];
+  let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WritePACKZ], (instregex
+        "(V?)PACK(SS|US)(DW|WB)Z(rr|rrk|rrkz)"
 	)>;
 
 // MAX and MIN Instructions
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s
index 9b721c933ab51..1ffe53366fdb0 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s
@@ -1365,13 +1365,13 @@ vzeroupper
 # CHECK-NEXT:  1      8     0.50    *                   vpabsd	(%rax), %xmm2
 # CHECK-NEXT:  1      2     1.00                        vpabsw	%xmm0, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpabsw	(%rax), %xmm2
-# CHECK-NEXT:  1      2     1.00                        vpackssdw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      2     0.50                        vpackssdw	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpackssdw	(%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      2     1.00                        vpacksswb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      2     0.50                        vpacksswb	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpacksswb	(%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      2     1.00                        vpackusdw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      2     0.50                        vpackusdw	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpackusdw	(%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      2     1.00                        vpackuswb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      2     0.50                        vpackuswb	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpackuswb	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.25                        vpaddb	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpaddb	(%rax), %xmm1, %xmm2
@@ -1389,7 +1389,7 @@ vzeroupper
 # CHECK-NEXT:  1      8     0.50    *                   vpaddusw	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.25                        vpaddw	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpaddw	(%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      2     1.00                        vpalignr	$1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      2     0.50                        vpalignr	$1, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpalignr	$1, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.25                        vpand	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpand	(%rax), %xmm1, %xmm2
@@ -1749,7 +1749,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 1.33   1.33   1.33   16.50  16.50  16.50  16.50   -     205.25 396.08 270.58 158.08 208.50 208.50 65.00  119.67 119.67 119.67 107.00 107.00 107.00 19.00  19.00
+# CHECK-NEXT: 1.33   1.33   1.33   16.50  16.50  16.50  16.50   -     205.25 393.58 268.08 158.08 208.50 208.50 65.00  119.67 119.67 119.67 107.00 107.00 107.00 19.00  19.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -2088,13 +2088,13 @@ vzeroupper
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpabsd	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vpabsw	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpabsw	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -      -      -      -     vpackssdw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     vpackssdw	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpackssdw	(%rax), %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -      -      -      -     vpacksswb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     vpacksswb	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpacksswb	(%rax), %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -      -      -      -     vpackusdw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     vpackusdw	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpackusdw	(%rax), %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -      -      -      -     vpackuswb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     vpackuswb	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpackuswb	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -     vpaddb	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpaddb	(%rax), %xmm1, %xmm2
@@ -2112,7 +2112,7 @@ vzeroupper
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpaddusw	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -     vpaddw	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpaddw	(%rax), %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -      -      -      -     vpalignr	$1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     vpalignr	$1, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpalignr	$1, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -     vpand	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpand	(%rax), %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
index 25e367c96e44b..6dc5bacde9059 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
@@ -484,13 +484,13 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      8     0.50    *                   vpabsd	(%rax), %ymm2
 # CHECK-NEXT:  1      1     0.50                        vpabsw	%ymm0, %ymm2
 # CHECK-NEXT:  1      8     0.50    *                   vpabsw	(%rax), %ymm2
-# CHECK-NEXT:  1      1     0.50                        vpackssdw	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      2     0.50                        vpackssdw	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  1      8     0.50    *                   vpackssdw	(%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.50                        vpacksswb	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      2     0.50                        vpacksswb	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  1      8     0.50    *                   vpacksswb	(%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.50                        vpackusdw	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      2     0.50                        vpackusdw	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  1      8     0.50    *                   vpackusdw	(%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.50                        vpackuswb	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      2     0.50                        vpackuswb	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  1      8     0.50    *                   vpackuswb	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.25                        vpaddb	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  1      8     0.50    *                   vpaddb	(%rax), %ymm1, %ymm2
@@ -508,7 +508,7 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      8     0.50    *                   vpaddusw	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.25                        vpaddw	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  1      8     0.50    *                   vpaddw	(%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.50                        vpalignr	$1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      2     0.50                        vpalignr	$1, %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  1      8     0.50    *                   vpalignr	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.25                        vpand	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  1      8     0.50    *                   vpand	(%rax), %ymm1, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bwvl.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bwvl.s
index a298dd69ee9b3..79f2cb4b7ab82 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bwvl.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bwvl.s
@@ -1166,53 +1166,53 @@ vpunpcklwd         (%rax), %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      8     0.50    *                   vpabsw	(%rax), %ymm19 {%k1}
 # CHECK-NEXT:  1      1     0.25                        vpabsw	%ymm16, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   vpabsw	(%rax), %ymm19 {%k1} {z}
-# CHECK-NEXT:  1      2     1.00                        vpackssdw	%xmm16, %xmm17, %xmm19
+# CHECK-NEXT:  1      2     0.50                        vpackssdw	%xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  1      8     0.50    *                   vpackssdw	(%rax), %xmm17, %xmm19
-# CHECK-NEXT:  1      2     1.00                        vpackssdw	%xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT:  1      2     0.50                        vpackssdw	%xmm16, %xmm17, %xmm19 {%k1}
 # CHECK-NEXT:  1      8     0.50    *                   vpackssdw	(%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  1      2     1.00                        vpackssdw	%xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT:  1      2     0.50                        vpackssdw	%xmm16, %xmm17, %xmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   vpackssdw	(%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  1      2     1.00                        vpackssdw	%ymm16, %ymm17, %ymm19
+# CHECK-NEXT:  1      2     0.50                        vpackssdw	%ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  1      8     0.50    *                   vpackssdw	(%rax), %ymm17, %ymm19
-# CHECK-NEXT:  1      2     1.00                        vpackssdw	%ymm16, %ymm17, %ymm19 {%k1}
+# CHECK-NEXT:  1      2     0.50                        vpackssdw	%ymm16, %ymm17, %ymm19 {%k1}
 # CHECK-NEXT:  1      8     0.50    *                   vpackssdw	(%rax), %ymm17, %ymm19 {%k1}
-# CHECK-NEXT:  1      2     1.00                        vpackssdw	%ymm16, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      2     0.50                        vpackssdw	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   vpackssdw	(%rax), %ymm17, %ymm19 {%k1} {z}
-# CHECK-NEXT:  1      2     1.00                        vpacksswb	%xmm16, %xmm17, %xmm19
+# CHECK-NEXT:  1      2     0.50                        vpacksswb	%xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  1      8     0.50    *                   vpacksswb	(%rax), %xmm17, %xmm19
-# CHECK-NEXT:  1      2     1.00                        vpacksswb	%xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT:  1      2     0.50                        vpacksswb	%xmm16, %xmm17, %xmm19 {%k1}
 # CHECK-NEXT:  1      8     0.50    *                   vpacksswb	(%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  1      2     1.00                        vpacksswb	%xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT:  1      2     0.50                        vpacksswb	%xmm16, %xmm17, %xmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   vpacksswb	(%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  1      2     1.00                        vpacksswb	%ymm16, %ymm17, %ymm19
+# CHECK-NEXT:  1      2     0.50                        vpacksswb	%ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  1      8     0.50    *                   vpacksswb	(%rax), %ymm17, %ymm19
-# CHECK-NEXT:  1      2     1.00                        vpacksswb	%ymm16, %ymm17, %ymm19 {%k1}
+# CHECK-NEXT:  1      2     0.50                        vpacksswb	%ymm16, %ymm17, %ymm19 {%k1}
 # CHECK-NEXT:  1      8     0.50    *                   vpacksswb	(%rax), %ymm17, %ymm19 {%k1}
-# CHECK-NEXT:  1      2     1.00                        vpacksswb	%ymm16, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      2     0.50                        vpacksswb	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   vpacksswb	(%rax), %ymm17, %ymm19 {%k1} {z}
-# CHECK-NEXT:  1      2     1.00                        vpackusdw	%xmm16, %xmm17, %xmm19
+# CHECK-NEXT:  1      2     0.50                        vpackusdw	%xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  1      8     0.50    *                   vpackusdw	(%rax), %xmm17, %xmm19
-# CHECK-NEXT:  1      2     1.00                        vpackusdw	%xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT:  1      2     0.50                        vpackusdw	%xmm16, %xmm17, %xmm19 {%k1}
 # CHECK-NEXT:  1      8     0.50    *                   vpackusdw	(%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  1      2     1.00                        vpackusdw	%xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT:  1      2     0.50                        vpackusdw	%xmm16, %xmm17, %xmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   vpackusdw	(%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  1      2     1.00                        vpackusdw	%ymm16, %ymm17, %ymm19
+# CHECK-NEXT:  1      2     0.50                        vpackusdw	%ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  1      8     0.50    *                   vpackusdw	(%rax), %ymm17, %ymm19
-# CHECK-NEXT:  1      2     1.00                        vpackusdw	%ymm16, %ymm17, %ymm19 {%k1}
+# CHECK-NEXT:  1      2     0.50                        vpackusdw	%ymm16, %ymm17, %ymm19 {%k1}
 # CHECK-NEXT:  1      8     0.50    *                   vpackusdw	(%rax), %ymm17, %ymm19 {%k1}
-# CHECK-NEXT:  1      2     1.00                        vpackusdw	%ymm16, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      2     0.50                        vpackusdw	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   vpackusdw	(%rax), %ymm17, %ymm19 {%k1} {z}
-# CHECK-NEXT:  1      2     1.00                        vpackuswb	%xmm16, %xmm17, %xmm19
+# CHECK-NEXT:  1      2     0.50                        vpackuswb	%xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  1      8     0.50    *                   vpackuswb	(%rax), %xmm17, %xmm19
-# CHECK-NEXT:  1      2     1.00                        vpackuswb	%xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT:  1      2     0.50                        vpackuswb	%xmm16, %xmm17, %xmm19 {%k1}
 # CHECK-NEXT:  1      8     0.50    *                   vpackuswb	(%rax), %xmm17, %xmm19 {%...
[truncated]

@RKSimon RKSimon merged commit fa0bb6a into llvm:main Sep 10, 2025
9 checks passed
@RKSimon RKSimon deleted the x86-pack-palignr-xmm-ymm-throughput branch September 10, 2025 15:09
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants