Skip to content

Commit

Permalink
[X86][SLM] Fix HADD/HSUB uops, latency and throughput
Browse files Browse the repository at this point in the history
Noticed while trying to improve generic reduction costs via the D103695 helper script. Confirmed with Intel AoM / Agner / InstLatX64.
  • Loading branch information
RKSimon committed Sep 11, 2021
1 parent 51d04e2 commit 484944a
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 71 deletions.
10 changes: 5 additions & 5 deletions llvm/lib/Target/X86/X86ScheduleSLM.td
Expand Up @@ -420,12 +420,12 @@ def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////

defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 6, [6], 4>;
defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 6, [6], 4>;
defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV1], 6, [6], 4, 1>;
defm : X86WriteResPairUnsupported<WriteFHAddY>;
defm : X86WriteResPairUnsupported<WriteFHAddZ>;
defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WritePHAddY, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 6, [6], 3, 1>;
defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 6, [6], 3, 1>;
defm : X86WriteResPairUnsupported<WritePHAddY>;
defm : X86WriteResPairUnsupported<WritePHAddZ>;

// String instructions.
Expand Down
34 changes: 17 additions & 17 deletions llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s
Expand Up @@ -47,14 +47,14 @@ mwait
# CHECK-NEXT: 1 7 2.00 * addsubpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 addsubps %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * addsubps (%rax), %xmm2
# CHECK-NEXT: 4 6 3.00 haddpd %xmm0, %xmm2
# CHECK-NEXT: 4 9 3.00 * haddpd (%rax), %xmm2
# CHECK-NEXT: 4 6 3.00 haddps %xmm0, %xmm2
# CHECK-NEXT: 4 9 3.00 * haddps (%rax), %xmm2
# CHECK-NEXT: 4 6 3.00 hsubpd %xmm0, %xmm2
# CHECK-NEXT: 4 9 3.00 * hsubpd (%rax), %xmm2
# CHECK-NEXT: 4 6 3.00 hsubps %xmm0, %xmm2
# CHECK-NEXT: 4 9 3.00 * hsubps (%rax), %xmm2
# CHECK-NEXT: 4 6 6.00 haddpd %xmm0, %xmm2
# CHECK-NEXT: 5 9 6.00 * haddpd (%rax), %xmm2
# CHECK-NEXT: 4 6 6.00 haddps %xmm0, %xmm2
# CHECK-NEXT: 5 9 6.00 * haddps (%rax), %xmm2
# CHECK-NEXT: 4 6 6.00 hsubpd %xmm0, %xmm2
# CHECK-NEXT: 5 9 6.00 * hsubpd (%rax), %xmm2
# CHECK-NEXT: 4 6 6.00 hsubps %xmm0, %xmm2
# CHECK-NEXT: 5 9 6.00 * hsubps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 * lddqu (%rax), %xmm2
# CHECK-NEXT: 1 100 1.00 U monitor
# CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2
Expand All @@ -77,22 +77,22 @@ mwait

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
# CHECK-NEXT: - - - 32.00 30.00 - - 10.00
# CHECK-NEXT: - - - 8.00 54.00 - - 10.00

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
# CHECK-NEXT: - - - - 2.00 - - - addsubpd %xmm0, %xmm2
# CHECK-NEXT: - - - - 2.00 - - 1.00 addsubpd (%rax), %xmm2
# CHECK-NEXT: - - - - 1.00 - - - addsubps %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - - 1.00 addsubps (%rax), %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - - haddpd %xmm0, %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 haddpd (%rax), %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - - haddps %xmm0, %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 haddps (%rax), %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - - hsubpd %xmm0, %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 hsubpd (%rax), %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - - hsubps %xmm0, %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 hsubps (%rax), %xmm2
# CHECK-NEXT: - - - - 6.00 - - - haddpd %xmm0, %xmm2
# CHECK-NEXT: - - - - 6.00 - - 1.00 haddpd (%rax), %xmm2
# CHECK-NEXT: - - - - 6.00 - - - haddps %xmm0, %xmm2
# CHECK-NEXT: - - - - 6.00 - - 1.00 haddps (%rax), %xmm2
# CHECK-NEXT: - - - - 6.00 - - - hsubpd %xmm0, %xmm2
# CHECK-NEXT: - - - - 6.00 - - 1.00 hsubpd (%rax), %xmm2
# CHECK-NEXT: - - - - 6.00 - - - hsubps %xmm0, %xmm2
# CHECK-NEXT: - - - - 6.00 - - 1.00 hsubps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 lddqu (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - monitor
# CHECK-NEXT: - - - 1.00 - - - - movddup %xmm0, %xmm2
Expand Down
98 changes: 49 additions & 49 deletions llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s
Expand Up @@ -122,30 +122,30 @@ psignw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 * palignr $1, (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 palignr $1, %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * palignr $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 phaddd %mm0, %mm2
# CHECK-NEXT: 1 4 1.00 * phaddd (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 phaddd %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * phaddd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 phaddsw %mm0, %mm2
# CHECK-NEXT: 1 4 1.00 * phaddsw (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 phaddsw %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * phaddsw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 phaddw %mm0, %mm2
# CHECK-NEXT: 1 4 1.00 * phaddw (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 phaddw %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * phaddw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 phsubd %mm0, %mm2
# CHECK-NEXT: 1 4 1.00 * phsubd (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 phsubd %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * phsubd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 phsubsw %mm0, %mm2
# CHECK-NEXT: 1 4 1.00 * phsubsw (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 phsubsw %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * phsubsw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 phsubw %mm0, %mm2
# CHECK-NEXT: 1 4 1.00 * phsubw (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 phsubw %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * phsubw (%rax), %xmm2
# CHECK-NEXT: 3 6 3.00 phaddd %mm0, %mm2
# CHECK-NEXT: 4 9 3.00 * phaddd (%rax), %mm2
# CHECK-NEXT: 3 6 3.00 phaddd %xmm0, %xmm2
# CHECK-NEXT: 4 9 3.00 * phaddd (%rax), %xmm2
# CHECK-NEXT: 3 6 3.00 phaddsw %mm0, %mm2
# CHECK-NEXT: 4 9 3.00 * phaddsw (%rax), %mm2
# CHECK-NEXT: 3 6 3.00 phaddsw %xmm0, %xmm2
# CHECK-NEXT: 4 9 3.00 * phaddsw (%rax), %xmm2
# CHECK-NEXT: 3 6 3.00 phaddw %mm0, %mm2
# CHECK-NEXT: 4 9 3.00 * phaddw (%rax), %mm2
# CHECK-NEXT: 3 6 3.00 phaddw %xmm0, %xmm2
# CHECK-NEXT: 4 9 3.00 * phaddw (%rax), %xmm2
# CHECK-NEXT: 3 6 3.00 phsubd %mm0, %mm2
# CHECK-NEXT: 4 9 3.00 * phsubd (%rax), %mm2
# CHECK-NEXT: 3 6 3.00 phsubd %xmm0, %xmm2
# CHECK-NEXT: 4 9 3.00 * phsubd (%rax), %xmm2
# CHECK-NEXT: 3 6 3.00 phsubsw %mm0, %mm2
# CHECK-NEXT: 4 9 3.00 * phsubsw (%rax), %mm2
# CHECK-NEXT: 3 6 3.00 phsubsw %xmm0, %xmm2
# CHECK-NEXT: 4 9 3.00 * phsubsw (%rax), %xmm2
# CHECK-NEXT: 3 6 3.00 phsubw %mm0, %mm2
# CHECK-NEXT: 4 9 3.00 * phsubw (%rax), %mm2
# CHECK-NEXT: 3 6 3.00 phsubw %xmm0, %xmm2
# CHECK-NEXT: 4 9 3.00 * phsubw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmaddubsw %mm0, %mm2
# CHECK-NEXT: 1 7 1.00 * pmaddubsw (%rax), %mm2
# CHECK-NEXT: 1 5 2.00 pmaddubsw %xmm0, %xmm2
Expand Down Expand Up @@ -183,7 +183,7 @@ psignw (%rax), %xmm2

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
# CHECK-NEXT: - - - 52.00 24.00 - - 32.00
# CHECK-NEXT: - - - 112.00 84.00 - - 32.00

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
Expand All @@ -203,30 +203,30 @@ psignw (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 palignr $1, (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - palignr $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 palignr $1, (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - phaddd %mm0, %mm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddd (%rax), %mm2
# CHECK-NEXT: - - - 0.50 0.50 - - - phaddd %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - phaddsw %mm0, %mm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddsw (%rax), %mm2
# CHECK-NEXT: - - - 0.50 0.50 - - - phaddsw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddsw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - phaddw %mm0, %mm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddw (%rax), %mm2
# CHECK-NEXT: - - - 0.50 0.50 - - - phaddw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - phsubd %mm0, %mm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubd (%rax), %mm2
# CHECK-NEXT: - - - 0.50 0.50 - - - phsubd %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - phsubsw %mm0, %mm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubsw (%rax), %mm2
# CHECK-NEXT: - - - 0.50 0.50 - - - phsubsw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubsw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - phsubw %mm0, %mm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubw (%rax), %mm2
# CHECK-NEXT: - - - 0.50 0.50 - - - phsubw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubw (%rax), %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - - phaddd %mm0, %mm2
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddd (%rax), %mm2
# CHECK-NEXT: - - - 3.00 3.00 - - - phaddd %xmm0, %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddd (%rax), %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - - phaddsw %mm0, %mm2
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddsw (%rax), %mm2
# CHECK-NEXT: - - - 3.00 3.00 - - - phaddsw %xmm0, %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddsw (%rax), %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - - phaddw %mm0, %mm2
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddw (%rax), %mm2
# CHECK-NEXT: - - - 3.00 3.00 - - - phaddw %xmm0, %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddw (%rax), %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - - phsubd %mm0, %mm2
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubd (%rax), %mm2
# CHECK-NEXT: - - - 3.00 3.00 - - - phsubd %xmm0, %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubd (%rax), %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - - phsubsw %mm0, %mm2
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubsw (%rax), %mm2
# CHECK-NEXT: - - - 3.00 3.00 - - - phsubsw %xmm0, %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubsw (%rax), %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - - phsubw %mm0, %mm2
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubw (%rax), %mm2
# CHECK-NEXT: - - - 3.00 3.00 - - - phsubw %xmm0, %xmm2
# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubw (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - pmaddubsw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmaddubsw (%rax), %mm2
# CHECK-NEXT: - - - 2.00 - - - - pmaddubsw %xmm0, %xmm2
Expand Down

0 comments on commit 484944a

Please sign in to comment.