Skip to content

Commit

Permalink
[X86] Adjust fadd costs to match SoG
Browse files Browse the repository at this point in the history
znver1/2 models were incorrectly modelling these on fpupipe 0 instead of 2/3 and znver1 ymm variants also require double pumping.

Now matches AMD SoG, Agner and instlatx64 numbers.

Thanks to @Fabian-R for the report
  • Loading branch information
RKSimon committed May 15, 2022
1 parent fdae864 commit 896557e
Show file tree
Hide file tree
Showing 16 changed files with 332 additions and 332 deletions.
12 changes: 6 additions & 6 deletions llvm/lib/Target/X86/X86ScheduleZnver1.td
Expand Up @@ -276,13 +276,13 @@ defm : X86WriteRes<WriteFMoveX, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [ZnFPU], 1, [1], 1>;
defm : X86WriteResUnsupported<WriteFMoveZ>;

defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAddY, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU23], 3>;
defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU23], 3>;
defm : ZnWriteResFpuPair<WriteFAddY, [ZnFPU23], 3, [2], 2>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : ZnWriteResFpuPair<WriteFAdd64, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAdd64X, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAdd64Y, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAdd64, [ZnFPU23], 3>;
defm : ZnWriteResFpuPair<WriteFAdd64X, [ZnFPU23], 3>;
defm : ZnWriteResFpuPair<WriteFAdd64Y, [ZnFPU23], 3, [2], 2>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteFCmpX, [ZnFPU01], 1>;
Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/Target/X86/X86ScheduleZnver2.td
Expand Up @@ -275,13 +275,13 @@ defm : X86WriteRes<WriteFMoveX, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [Zn2FPU], 1, [1], 1>;
defm : X86WriteResUnsupported<WriteFMoveZ>;

defm : Zn2WriteResFpuPair<WriteFAdd, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFAddX, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFAddY, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFAdd, [Zn2FPU23], 3>;
defm : Zn2WriteResFpuPair<WriteFAddX, [Zn2FPU23], 3>;
defm : Zn2WriteResFpuPair<WriteFAddY, [Zn2FPU23], 3>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : Zn2WriteResFpuPair<WriteFAdd64, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFAdd64X, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFAdd64Y, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFAdd64, [Zn2FPU23], 3>;
defm : Zn2WriteResFpuPair<WriteFAdd64X, [Zn2FPU23], 3>;
defm : Zn2WriteResFpuPair<WriteFAdd64Y, [Zn2FPU23], 3>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : Zn2WriteResFpuPair<WriteFCmp, [Zn2FPU01], 1>;
defm : Zn2WriteResFpuPair<WriteFCmpX, [Zn2FPU01], 1>;
Expand Down
130 changes: 65 additions & 65 deletions llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s

Large diffs are not rendered by default.

34 changes: 17 additions & 17 deletions llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s
Expand Up @@ -194,10 +194,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 addps %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * addps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 addss %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * addss (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 addps %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * addps (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 addss %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * addss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.25 andnps %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * andnps (%rax), %xmm2
# CHECK-NEXT: 1 1 0.25 andps %xmm0, %xmm2
Expand Down Expand Up @@ -306,10 +306,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 20 20.00 sqrtss %xmm0, %xmm2
# CHECK-NEXT: 1 27 20.00 * sqrtss (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 * U stmxcsr (%rax)
# CHECK-NEXT: 1 3 1.00 subps %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * subps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 subss %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * subss (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 subps %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * subps (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 subss %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * subss (%rax), %xmm2
# CHECK-NEXT: 2 3 1.00 ucomiss %xmm0, %xmm1
# CHECK-NEXT: 2 10 1.00 * ucomiss (%rax), %xmm1
# CHECK-NEXT: 1 1 0.50 unpckhps %xmm0, %xmm2
Expand All @@ -335,14 +335,14 @@ xorps (%rax), %xmm2

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
# CHECK-NEXT: 32.50 32.50 - - - - - 33.00 29.50 26.00 108.50 -
# CHECK-NEXT: 32.50 32.50 - - - - - 25.00 29.50 30.00 112.50 -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
# CHECK-NEXT: - - - - - - - 1.00 - - - - addps %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - addps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - addss %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - addss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - addps %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - addps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - addss %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - addss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - andnps %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - andnps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - andps %xmm0, %xmm2
Expand Down Expand Up @@ -451,10 +451,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - 20.00 - sqrtss %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - sqrtss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - stmxcsr (%rax)
# CHECK-NEXT: - - - - - - - 1.00 - - - - subps %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - subps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - subss %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - subss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - subps %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - subps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - subss %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - subss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 0.50 0.50 1.00 - - ucomiss %xmm0, %xmm1
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 1.00 - - ucomiss (%rax), %xmm1
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - unpckhps %xmm0, %xmm2
Expand Down
34 changes: 17 additions & 17 deletions llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s
Expand Up @@ -407,10 +407,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 addpd %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * addpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 addsd %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * addsd (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 addpd %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * addpd (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 addsd %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * addsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.25 andnpd %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * andnpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.25 andpd %xmm0, %xmm2
Expand Down Expand Up @@ -662,10 +662,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 27 20.00 * sqrtpd (%rax), %xmm2
# CHECK-NEXT: 1 20 20.00 sqrtsd %xmm0, %xmm2
# CHECK-NEXT: 1 27 20.00 * sqrtsd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 subpd %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * subpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 subsd %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * subsd (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 subpd %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * subpd (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 subsd %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * subsd (%rax), %xmm2
# CHECK-NEXT: 2 3 1.00 ucomisd %xmm0, %xmm1
# CHECK-NEXT: 2 10 1.00 * ucomisd (%rax), %xmm1
# CHECK-NEXT: 1 1 0.50 unpckhpd %xmm0, %xmm2
Expand All @@ -691,14 +691,14 @@ xorpd (%rax), %xmm2

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
# CHECK-NEXT: 66.50 66.50 - - - - - 64.92 48.42 75.75 153.92 -
# CHECK-NEXT: 66.50 66.50 - - - - - 56.92 48.42 79.75 157.92 -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
# CHECK-NEXT: - - - - - - - 1.00 - - - - addpd %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - addpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - addsd %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - addsd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - addpd %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - addpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - addsd %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - addsd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - andnpd %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - andnpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - andpd %xmm0, %xmm2
Expand Down Expand Up @@ -950,10 +950,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - sqrtpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - 20.00 - sqrtsd %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - sqrtsd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - subpd %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - subpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - subsd %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - subsd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - subpd %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - subpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - subsd %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - subsd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 0.50 0.50 1.00 - - ucomisd %xmm0, %xmm1
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 1.00 - - ucomisd (%rax), %xmm1
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - unpckhpd %xmm0, %xmm2
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/tools/llvm-mca/X86/Znver1/resources-sse3.s
Expand Up @@ -43,10 +43,10 @@ mwait
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 addsubpd %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * addsubpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 addsubps %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * addsubps (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 addsubpd %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * addsubpd (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 addsubps %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * addsubps (%rax), %xmm2
# CHECK-NEXT: 1 7 0.25 haddpd %xmm0, %xmm2
# CHECK-NEXT: 1 14 0.50 * haddpd (%rax), %xmm2
# CHECK-NEXT: 1 7 0.25 haddps %xmm0, %xmm2
Expand Down Expand Up @@ -81,14 +81,14 @@ mwait

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
# CHECK-NEXT: 4.50 4.50 - - - - - 4.00 2.00 2.00 - -
# CHECK-NEXT: 4.50 4.50 - - - - - - 2.00 4.00 2.00 -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
# CHECK-NEXT: - - - - - - - 1.00 - - - - addsubpd %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - addsubpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - addsubps %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - addsubps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - addsubpd %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - addsubpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - addsubps %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - addsubps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - haddpd %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - haddpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - haddps %xmm0, %xmm2
Expand Down

0 comments on commit 896557e

Please sign in to comment.