Skip to content

Commit

Permalink
[X86][BtVer2] Improved latency and throughput of float/vector loads a…
Browse files Browse the repository at this point in the history
…nd stores.

This patch introduces the following changes to the btver2 scheduling model:

- The number of micro opcodes for YMM loads and stores is now 2 (it was
  incorrectly set to 1 for both aligned and misaligned loads/stores).

- Increased the number of AGU resource cycles for YMM loads and stores
  to 2cy (instead of 1cy).

- Removed JFPU01 and JFPX from the list of resources consumed by pure
  float/vector loads (no MMX).

I verified with llvm-exegesis that pure XMM/YMM loads are no-pipe. Those
are dispatched to the FPU but not really issues on JFPU01.

Differential Revision: https://reviews.llvm.org/D68871

llvm-svn: 374765
  • Loading branch information
Andrea Di Biagio authored and Andrea Di Biagio committed Oct 14, 2019
1 parent 527a35e commit b744abb
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 65 deletions.
12 changes: 6 additions & 6 deletions llvm/lib/Target/X86/X86ScheduleBtVer2.td
Original file line number Diff line number Diff line change
Expand Up @@ -501,14 +501,14 @@ defm : X86WriteRes<WriteFLD0, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLD1, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLDC, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFLoadX, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFLoadY, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFLoadX, [JLAGU], 5, [1], 1>;
defm : X86WriteRes<WriteFLoadY, [JLAGU], 5, [2], 2>;
defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 2, 2], 1>;
defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 4, 4], 2>;

defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [2, 2, 2], 2>;
defm : X86WriteRes<WriteFStoreNT, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [JSAGU, JFPU1, JSTC], 3, [2, 2, 2], 1>;
Expand Down Expand Up @@ -657,16 +657,16 @@ defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
////////////////////////////////////////////////////////////////////////////////

defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadX, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadX, [JLAGU], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [JLAGU], 5, [2], 2>;
defm : X86WriteRes<WriteVecLoadNT, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadNTY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 2, 2], 1>;
defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 4, 4], 2>;

defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [2, 2, 2], 2>;
defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [JSAGU, JFPU1, JSTC], 2, [2, 2, 2], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-3.s
Original file line number Diff line number Diff line change
Expand Up @@ -83,17 +83,17 @@ vmovaps %xmm0, 48(%rdi)

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 2.00 2.00 4.00 4.00 4.00 - 4.00 4.00 - - -
# CHECK-NEXT: - - - - - - 4.00 4.00 - 4.00 4.00 - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - 1.00 1.00 - 1.00 - - - - - - vmovaps (%rsi), %xmm0
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovaps (%rsi), %xmm0
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, (%rdi)
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: - - - - 1.00 1.00 - 1.00 - - - - - - vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, 32(%rdi)
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, 48(%rdi)

# CHECK: Timeline view:
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,17 @@ vmovaps %xmm0, 48(%rdi)

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 2.00 2.00 3.99 4.01 4.00 - 4.00 4.00 - - -
# CHECK-NEXT: - - - - - - 4.00 4.00 - 4.00 4.00 - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - 1.00 0.99 0.01 1.00 - - - - - - vmovaps (%rsi), %xmm0
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovaps (%rsi), %xmm0
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, (%rdi)
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: - - - - 1.00 1.00 - 1.00 - - - - - - vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, 32(%rdi)
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, 48(%rdi)

# CHECK: Timeline view:
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,17 @@ vmovaps %xmm0, 48(%rdi)

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 2.00 2.00 3.97 4.03 4.00 - 4.00 4.00 - - -
# CHECK-NEXT: - - - - - - 4.00 4.00 - 4.00 4.00 - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - 1.00 0.98 0.02 1.00 - - - - - - vmovaps (%rsi), %xmm0
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovaps (%rsi), %xmm0
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, (%rdi)
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: - - - - 1.00 0.99 0.01 1.00 - - - - - - vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, 32(%rdi)
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, 48(%rdi)

# CHECK: Timeline view:
Expand Down
Loading

0 comments on commit b744abb

Please sign in to comment.