Skip to content

Commit

Permalink
[X86] Adjust vector shift costs to match SoG (Issue #54889)
Browse files Browse the repository at this point in the history
znver1/2 models were incorrectly modelling the fpupipe (should be pipe2 for shift-by-scalar-amount and pipe1 for shift-by-element-amount) and znver1 ymm variants also require double pumping.

Now matches AMD SoG, Agner and instlatx64 numbers.

Thanks to @Fabian-R for the report
  • Loading branch information
RKSimon committed May 29, 2022
1 parent 9080e21 commit c996904
Show file tree
Hide file tree
Showing 12 changed files with 329 additions and 331 deletions.
18 changes: 8 additions & 10 deletions llvm/lib/Target/X86/X86ScheduleZnver1.td
Expand Up @@ -398,14 +398,17 @@ defm : X86WriteRes<WriteVecMoveToGpr, [ZnFPU2], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [ZnFPU2], 3, [1], 1>;
defm : X86WriteRes<WriteEMMS, [ZnFPU], 2, [1], 1>;

defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU2], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftX, [ZnFPU2], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftY, [ZnFPU2], 2>;
defm : ZnWriteResFpuPair<WriteVecShiftY, [ZnFPU2], 1, [2], 2>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : ZnWriteResFpuPair<WriteVecShiftImm, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftImm, [ZnFPU2], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU2], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU2], 1, [2], 2>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU1], 3, [2], 1>;
defm : ZnWriteResFpuPair<WriteVarVecShiftY, [ZnFPU1], 3, [4], 2>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogicX, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>;
Expand Down Expand Up @@ -444,11 +447,6 @@ defm : ZnWriteResFpuPair<WritePSADBWY, [ZnFPU0], 3>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : ZnWriteResFpuPair<WritePHMINPOS, [ZnFPU0], 4>;

// Vector Shift Operations
defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteVarVecShiftY, [ZnFPU12], 1>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;

// Vector insert/extract operations.
defm : ZnWriteResFpuPair<WriteVecInsert, [ZnFPU], 1>;

Expand Down
16 changes: 7 additions & 9 deletions llvm/lib/Target/X86/X86ScheduleZnver2.td
Expand Up @@ -393,14 +393,17 @@ defm : X86WriteRes<WriteVecMoveToGpr, [Zn2FPU2], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [Zn2FPU2], 3, [1], 1>;
defm : X86WriteRes<WriteEMMS, [Zn2FPU], 2, [1], 1>;

defm : Zn2WriteResFpuPair<WriteVecShift, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteVecShift, [Zn2FPU2], 1>;
defm : Zn2WriteResFpuPair<WriteVecShiftX, [Zn2FPU2], 1>;
defm : Zn2WriteResFpuPair<WriteVecShiftY, [Zn2FPU2], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : Zn2WriteResFpuPair<WriteVecShiftImm, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteVecShiftImmX, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteVecShiftImmY, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteVecShiftImm, [Zn2FPU2], 1>;
defm : Zn2WriteResFpuPair<WriteVecShiftImmX, [Zn2FPU2], 1>;
defm : Zn2WriteResFpuPair<WriteVecShiftImmY, [Zn2FPU2], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : Zn2WriteResFpuPair<WriteVarVecShift, [Zn2FPU1], 3, [2], 1>;
defm : Zn2WriteResFpuPair<WriteVarVecShiftY, [Zn2FPU1], 3, [2], 1>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
defm : Zn2WriteResFpuPair<WriteVecLogic, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteVecLogicX, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteVecLogicY, [Zn2FPU], 1>;
Expand Down Expand Up @@ -439,11 +442,6 @@ defm : Zn2WriteResFpuPair<WritePSADBWY, [Zn2FPU0], 3>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : Zn2WriteResFpuPair<WritePHMINPOS, [Zn2FPU0], 4>;

// Vector Shift Operations
defm : Zn2WriteResFpuPair<WriteVarVecShift, [Zn2FPU12], 3>;
defm : Zn2WriteResFpuPair<WriteVarVecShiftY, [Zn2FPU12], 3>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;

// Vector insert/extract operations.
defm : Zn2WriteResFpuPair<WriteVecInsert, [Zn2FPU], 1>;

Expand Down
34 changes: 17 additions & 17 deletions llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
Expand Up @@ -1562,30 +1562,30 @@ vzeroupper
# CHECK-NEXT: 1 8 0.50 * vpsignd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpsignw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpsignw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpslld $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpslld $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpslld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vpslld (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vpslldq $1, %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpsllq $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsllq $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsllq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vpsllq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpsllw $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsllw $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsllw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vpsllw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpsrad $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrad $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrad %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vpsrad (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpsraw $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsraw $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsraw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vpsraw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpsrld $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrld $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vpsrld (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrldq $1, %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpsrlq $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrlq $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrlq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vpsrlq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpsrlw $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrlw $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrlw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vpsrlw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpsubb %xmm0, %xmm1, %xmm2
Expand Down Expand Up @@ -1738,7 +1738,7 @@ vzeroupper

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
# CHECK-NEXT: 175.00 175.00 - - - - - 146.58 171.08 198.25 527.08 -
# CHECK-NEXT: 175.00 175.00 - - - - - 144.58 169.08 204.25 525.08 -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
Expand Down Expand Up @@ -2274,30 +2274,30 @@ vzeroupper
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpsignd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsignw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpsignw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpslld $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpslld $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpslld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpslld (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpslldq $1, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsllq $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsllq $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsllq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsllq (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsllw $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsllw $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsllw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsllw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsrad $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrad $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrad %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsrad (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsraw $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsraw $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsraw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsraw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsrld $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrld $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsrld (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrldq $1, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsrlq $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrlq $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrlq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsrlq (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsrlw $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrlw $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrlw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsrlw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsubb %xmm0, %xmm1, %xmm2
Expand Down

0 comments on commit c996904

Please sign in to comment.