Skip to content

Commit

Permalink
[X86] HSW/BDW - vector splat shifts don't use Port5 when loading the …
Browse files Browse the repository at this point in the history
…shift amount

Noticed while trying to compare splat vs per-element shift perf stats for #39424

Confirmed with uops.info
  • Loading branch information
RKSimon committed Mar 25, 2024
1 parent edfa97a commit 3dcf62b
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 72 deletions.
6 changes: 4 additions & 2 deletions llvm/lib/Target/X86/X86SchedBroadwell.td
Original file line number Diff line number Diff line change
Expand Up @@ -471,9 +471,11 @@ defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : BWWriteResPair<WritePHMINPOS, [BWPort0], 5>; // Vector PHMINPOS.

// Vector integer shifts.
defm : BWWriteResPair<WriteVecShift, [BWPort0], 1, [1], 1, 5>;
defm : BWWriteResPair<WriteVecShiftX, [BWPort0,BWPort5], 2, [1,1], 2, 5>;
defm : X86WriteRes<WriteVecShift, [BWPort0], 1, [1], 1>;
defm : X86WriteRes<WriteVecShiftX, [BWPort0,BWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftY, [BWPort0,BWPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftLd, [BWPort0,BWPort23], 6, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftXLd, [BWPort0,BWPort23], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftYLd, [BWPort0,BWPort23], 7, [1,1], 2>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;

Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/X86/X86SchedHaswell.td
Original file line number Diff line number Diff line change
Expand Up @@ -469,10 +469,12 @@ defm : HWWriteResPair<WritePSADBWZ, [HWPort0], 5, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WritePHMINPOS, [HWPort0], 5, [1], 1, 6>;

// Vector integer shifts.
defm : HWWriteResPair<WriteVecShift, [HWPort0], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteVecShiftX, [HWPort0,HWPort5], 2, [1,1], 2, 6>;
defm : X86WriteRes<WriteVecShift, [HWPort0], 1, [1], 1>;
defm : X86WriteRes<WriteVecShiftX, [HWPort0,HWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftY, [HWPort0,HWPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftZ, [HWPort0,HWPort5], 4, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteVecShiftLd, [HWPort0,HWPort23], 6, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftXLd, [HWPort0,HWPort23], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftYLd, [HWPort0,HWPort23], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftZLd, [HWPort0,HWPort23], 8, [1,1], 2>; // Unsupported = 1

Expand Down
34 changes: 17 additions & 17 deletions llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
Original file line number Diff line number Diff line change
Expand Up @@ -1564,30 +1564,30 @@ vzeroupper
# CHECK-NEXT: 2 6 0.50 * vpsignw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vpslld $1, %xmm0, %xmm2
# CHECK-NEXT: 2 2 1.00 vpslld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 7 1.00 * vpslld (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 7 1.00 * vpslld (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vpslldq $1, %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsllq $1, %xmm0, %xmm2
# CHECK-NEXT: 2 2 1.00 vpsllq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 7 1.00 * vpsllq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 7 1.00 * vpsllq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsllw $1, %xmm0, %xmm2
# CHECK-NEXT: 2 2 1.00 vpsllw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 7 1.00 * vpsllw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 7 1.00 * vpsllw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrad $1, %xmm0, %xmm2
# CHECK-NEXT: 2 2 1.00 vpsrad %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 7 1.00 * vpsrad (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 7 1.00 * vpsrad (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsraw $1, %xmm0, %xmm2
# CHECK-NEXT: 2 2 1.00 vpsraw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 7 1.00 * vpsraw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 7 1.00 * vpsraw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrld $1, %xmm0, %xmm2
# CHECK-NEXT: 2 2 1.00 vpsrld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 7 1.00 * vpsrld (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 7 1.00 * vpsrld (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrldq $1, %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrlq $1, %xmm0, %xmm2
# CHECK-NEXT: 2 2 1.00 vpsrlq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 7 1.00 * vpsrlq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 7 1.00 * vpsrlq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrlw $1, %xmm0, %xmm2
# CHECK-NEXT: 2 2 1.00 vpsrlw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 7 1.00 * vpsrlw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 7 1.00 * vpsrlw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsubb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 6 0.50 * vpsubb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsubd %xmm0, %xmm1, %xmm2
Expand Down Expand Up @@ -1736,7 +1736,7 @@ vzeroupper

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 432.25 2.25 12.67
# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 424.25 2.25 12.67

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
Expand Down Expand Up @@ -2274,30 +2274,30 @@ vzeroupper
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - vpsignw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vpslld $1, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpslld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpslld (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpslld (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - vpslldq $1, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vpsllq $1, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpsllq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpsllq (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpsllq (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vpsllw $1, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpsllw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpsllw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpsllw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vpsrad $1, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpsrad %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpsrad (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpsrad (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vpsraw $1, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpsraw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpsraw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpsraw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vpsrld $1, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpsrld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpsrld (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpsrld (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - vpsrldq $1, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vpsrlq $1, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpsrlq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpsrlq (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpsrlq (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vpsrlw $1, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpsrlw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpsrlw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpsrlw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - vpsubb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - vpsubb (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - vpsubd %xmm0, %xmm1, %xmm2
Expand Down
34 changes: 17 additions & 17 deletions llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s
Original file line number Diff line number Diff line change
Expand Up @@ -596,30 +596,30 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 * pshuflw $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pslld $1, %xmm2
# CHECK-NEXT: 2 2 1.00 pslld %xmm0, %xmm2
# CHECK-NEXT: 3 7 1.00 * pslld (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 * pslld (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pslldq $1, %xmm2
# CHECK-NEXT: 1 1 1.00 psllq $1, %xmm2
# CHECK-NEXT: 2 2 1.00 psllq %xmm0, %xmm2
# CHECK-NEXT: 3 7 1.00 * psllq (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 * psllq (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 psllw $1, %xmm2
# CHECK-NEXT: 2 2 1.00 psllw %xmm0, %xmm2
# CHECK-NEXT: 3 7 1.00 * psllw (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 * psllw (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 psrad $1, %xmm2
# CHECK-NEXT: 2 2 1.00 psrad %xmm0, %xmm2
# CHECK-NEXT: 3 7 1.00 * psrad (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 * psrad (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 psraw $1, %xmm2
# CHECK-NEXT: 2 2 1.00 psraw %xmm0, %xmm2
# CHECK-NEXT: 3 7 1.00 * psraw (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 * psraw (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 psrld $1, %xmm2
# CHECK-NEXT: 2 2 1.00 psrld %xmm0, %xmm2
# CHECK-NEXT: 3 7 1.00 * psrld (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 * psrld (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 psrldq $1, %xmm2
# CHECK-NEXT: 1 1 1.00 psrlq $1, %xmm2
# CHECK-NEXT: 2 2 1.00 psrlq %xmm0, %xmm2
# CHECK-NEXT: 3 7 1.00 * psrlq (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 * psrlq (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 psrlw $1, %xmm2
# CHECK-NEXT: 2 2 1.00 psrlw %xmm0, %xmm2
# CHECK-NEXT: 3 7 1.00 * psrlw (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 * psrlw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psubb %xmm0, %xmm2
# CHECK-NEXT: 2 6 0.50 * psubb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psubd %xmm0, %xmm2
Expand Down Expand Up @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - 78.00 70.75 95.75 63.17 63.17 14.00 127.25 2.25 4.67
# CHECK-NEXT: - 78.00 70.75 95.75 63.17 63.17 14.00 119.25 2.25 4.67

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
Expand Down Expand Up @@ -882,30 +882,30 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pshuflw $1, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - pslld $1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - pslld %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - pslld (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - pslld (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - pslldq $1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - psllq $1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - psllq %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - psllq (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - psllq (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - psllw $1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - psllw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - psllw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - psllw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - psrad $1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - psrad %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - psrad (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - psrad (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - psraw $1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - psraw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - psraw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - psraw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - psrld $1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - psrld %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - psrld (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - psrld (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - psrldq $1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - psrlq $1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - psrlq %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - psrlq (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - psrlq (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - psrlw $1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - psrlw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - psrlw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - psrlw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - psubb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - psubb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - psubd %xmm0, %xmm2
Expand Down
Loading

0 comments on commit 3dcf62b

Please sign in to comment.