Skip to content

Commit

Permalink
[X86] Fix SLM FP<->INT throughputs.
Browse files Browse the repository at this point in the history
Noticed while trying to clean up the shift costs model for SSE4 targets using the script in D10369 - SLM double-pumps all the 128-bit vector conversion ops and only use FP0 pipe - numbers taken from Intel AOM + Agner.
  • Loading branch information
RKSimon committed Jul 22, 2021
1 parent 1cda1e6 commit d073b19
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 132 deletions.
36 changes: 18 additions & 18 deletions llvm/lib/Target/X86/X86ScheduleSLM.td
Expand Up @@ -284,31 +284,31 @@ defm : X86WriteResPairUnsupported<WriteFShuffle256>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;

// Conversion between integer and float.
defm : SLMWriteResPair<WriteCvtSS2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2IY, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtSS2I, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteCvtPS2I, [SLM_FPC_RSV0], 5, [2]>;
defm : SLMWriteResPair<WriteCvtPS2IY, [SLM_FPC_RSV0], 5, [2]>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : SLMWriteResPair<WriteCvtSD2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2IY, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtSD2I, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteCvtPD2I, [SLM_FPC_RSV0], 5, [2]>;
defm : SLMWriteResPair<WriteCvtPD2IY, [SLM_FPC_RSV0], 5, [2]>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;

defm : SLMWriteResPair<WriteCvtI2SS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PSY, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2SS, [SLM_FPC_RSV0], 5, [2]>;
defm : SLMWriteResPair<WriteCvtI2PS, [SLM_FPC_RSV0], 5, [2]>;
defm : SLMWriteResPair<WriteCvtI2PSY, [SLM_FPC_RSV0], 5, [2]>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : SLMWriteResPair<WriteCvtI2SD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PDY, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2SD, [SLM_FPC_RSV0], 5, [2]>;
defm : SLMWriteResPair<WriteCvtI2PD, [SLM_FPC_RSV0], 5, [2]>;
defm : SLMWriteResPair<WriteCvtI2PDY, [SLM_FPC_RSV0], 5, [2]>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;

defm : SLMWriteResPair<WriteCvtSS2SD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2PD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2PDY, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtSS2SD, [SLM_FPC_RSV0], 4, [2]>;
defm : SLMWriteResPair<WriteCvtPS2PD, [SLM_FPC_RSV0], 5, [2]>;
defm : SLMWriteResPair<WriteCvtPS2PDY, [SLM_FPC_RSV0], 5, [2]>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : SLMWriteResPair<WriteCvtSD2SS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2PS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2PSY, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtSD2SS, [SLM_FPC_RSV0], 4, [2]>;
defm : SLMWriteResPair<WriteCvtPD2PS, [SLM_FPC_RSV0], 5, [2]>;
defm : SLMWriteResPair<WriteCvtPD2PSY, [SLM_FPC_RSV0], 5, [2]>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;

defm : X86WriteResPairUnsupported<WriteCvtPH2PS>;
Expand Down
74 changes: 37 additions & 37 deletions llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s
Expand Up @@ -208,24 +208,24 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 6 1.00 * cmpeqss (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 comiss %xmm0, %xmm1
# CHECK-NEXT: 1 6 1.00 * comiss (%rax), %xmm1
# CHECK-NEXT: 1 4 0.50 cvtpi2ps %mm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: 1 4 0.50 cvtps2pi %xmm0, %mm2
# CHECK-NEXT: 1 7 1.00 * cvtps2pi (%rax), %mm2
# CHECK-NEXT: 1 4 0.50 cvtsi2ss %ecx, %xmm2
# CHECK-NEXT: 1 4 0.50 cvtsi2ss %rcx, %xmm2
# CHECK-NEXT: 1 7 1.00 * cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: 1 7 1.00 * cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: 1 4 0.50 cvtss2si %xmm0, %ecx
# CHECK-NEXT: 1 4 0.50 cvtss2si %xmm0, %rcx
# CHECK-NEXT: 1 7 1.00 * cvtss2si (%rax), %ecx
# CHECK-NEXT: 1 7 1.00 * cvtss2si (%rax), %rcx
# CHECK-NEXT: 1 4 0.50 cvttps2pi %xmm0, %mm2
# CHECK-NEXT: 1 7 1.00 * cvttps2pi (%rax), %mm2
# CHECK-NEXT: 1 4 0.50 cvttss2si %xmm0, %ecx
# CHECK-NEXT: 1 4 0.50 cvttss2si %xmm0, %rcx
# CHECK-NEXT: 1 7 1.00 * cvttss2si (%rax), %ecx
# CHECK-NEXT: 1 7 1.00 * cvttss2si (%rax), %rcx
# CHECK-NEXT: 1 5 2.00 cvtpi2ps %mm0, %xmm2
# CHECK-NEXT: 1 8 2.00 * cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: 1 5 2.00 cvtps2pi %xmm0, %mm2
# CHECK-NEXT: 1 8 2.00 * cvtps2pi (%rax), %mm2
# CHECK-NEXT: 1 5 2.00 cvtsi2ss %ecx, %xmm2
# CHECK-NEXT: 1 5 2.00 cvtsi2ss %rcx, %xmm2
# CHECK-NEXT: 1 8 2.00 * cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: 1 8 2.00 * cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 cvtss2si %xmm0, %ecx
# CHECK-NEXT: 1 5 1.00 cvtss2si %xmm0, %rcx
# CHECK-NEXT: 1 8 1.00 * cvtss2si (%rax), %ecx
# CHECK-NEXT: 1 8 1.00 * cvtss2si (%rax), %rcx
# CHECK-NEXT: 1 5 2.00 cvttps2pi %xmm0, %mm2
# CHECK-NEXT: 1 8 2.00 * cvttps2pi (%rax), %mm2
# CHECK-NEXT: 1 5 1.00 cvttss2si %xmm0, %ecx
# CHECK-NEXT: 1 5 1.00 cvttss2si %xmm0, %rcx
# CHECK-NEXT: 1 8 1.00 * cvttss2si (%rax), %ecx
# CHECK-NEXT: 1 8 1.00 * cvttss2si (%rax), %rcx
# CHECK-NEXT: 1 39 39.00 divps %xmm0, %xmm2
# CHECK-NEXT: 1 42 39.00 * divps (%rax), %xmm2
# CHECK-NEXT: 1 19 17.00 divss %xmm0, %xmm2
Expand Down Expand Up @@ -331,7 +331,7 @@ xorps (%rax), %xmm2

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
# CHECK-NEXT: - 232.00 8.00 61.00 46.00 0.50 0.50 67.00
# CHECK-NEXT: - 232.00 8.00 80.00 37.00 0.50 0.50 67.00

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
Expand All @@ -349,24 +349,24 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - - - 1.00 - - 1.00 cmpeqss (%rax), %xmm2
# CHECK-NEXT: - - - - 1.00 - - - comiss %xmm0, %xmm1
# CHECK-NEXT: - - - - 1.00 - - 1.00 comiss (%rax), %xmm1
# CHECK-NEXT: - - - 0.50 0.50 - - - cvtpi2ps %mm0, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - cvtps2pi %xmm0, %mm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 cvtps2pi (%rax), %mm2
# CHECK-NEXT: - - - 0.50 0.50 - - - cvtsi2ss %ecx, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - cvtsi2ss %rcx, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - cvtss2si %xmm0, %ecx
# CHECK-NEXT: - - - 0.50 0.50 - - - cvtss2si %xmm0, %rcx
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 cvtss2si (%rax), %ecx
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 cvtss2si (%rax), %rcx
# CHECK-NEXT: - - - 0.50 0.50 - - - cvttps2pi %xmm0, %mm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 cvttps2pi (%rax), %mm2
# CHECK-NEXT: - - - 0.50 0.50 - - - cvttss2si %xmm0, %ecx
# CHECK-NEXT: - - - 0.50 0.50 - - - cvttss2si %xmm0, %rcx
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 cvttss2si (%rax), %ecx
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 cvttss2si (%rax), %rcx
# CHECK-NEXT: - - - 2.00 - - - - cvtpi2ps %mm0, %xmm2
# CHECK-NEXT: - - - 2.00 - - - 1.00 cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: - - - 2.00 - - - - cvtps2pi %xmm0, %mm2
# CHECK-NEXT: - - - 2.00 - - - 1.00 cvtps2pi (%rax), %mm2
# CHECK-NEXT: - - - 2.00 - - - - cvtsi2ss %ecx, %xmm2
# CHECK-NEXT: - - - 2.00 - - - - cvtsi2ss %rcx, %xmm2
# CHECK-NEXT: - - - 2.00 - - - 1.00 cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: - - - 2.00 - - - 1.00 cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - cvtss2si %xmm0, %ecx
# CHECK-NEXT: - - - 1.00 - - - - cvtss2si %xmm0, %rcx
# CHECK-NEXT: - - - 1.00 - - - 1.00 cvtss2si (%rax), %ecx
# CHECK-NEXT: - - - 1.00 - - - 1.00 cvtss2si (%rax), %rcx
# CHECK-NEXT: - - - 2.00 - - - - cvttps2pi %xmm0, %mm2
# CHECK-NEXT: - - - 2.00 - - - 1.00 cvttps2pi (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - cvttss2si %xmm0, %ecx
# CHECK-NEXT: - - - 1.00 - - - - cvttss2si %xmm0, %rcx
# CHECK-NEXT: - - - 1.00 - - - 1.00 cvttss2si (%rax), %ecx
# CHECK-NEXT: - - - 1.00 - - - 1.00 cvttss2si (%rax), %rcx
# CHECK-NEXT: - 39.00 - 1.00 - - - - divps %xmm0, %xmm2
# CHECK-NEXT: - 39.00 - 1.00 - - - 1.00 divps (%rax), %xmm2
# CHECK-NEXT: - 17.00 - 1.00 - - - - divss %xmm0, %xmm2
Expand Down

0 comments on commit d073b19

Please sign in to comment.