Skip to content

Commit

Permalink
[X86] Replace unnecessary SKL CVTSI2SS/CVTSI2SD overrides with better…
Browse files Browse the repository at this point in the history
… base class defs

The folded patterns were missing entirely - confirmed by both Agner + uops.info
  • Loading branch information
RKSimon committed Nov 12, 2022
1 parent 2be46b3 commit fca6364
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 27 deletions.
15 changes: 7 additions & 8 deletions llvm/lib/Target/X86/X86SchedSkylakeClient.td
Expand Up @@ -451,13 +451,15 @@ defm : SKLWriteResPair<WriteCvtPD2I, [SKLPort5,SKLPort01], 5, [1,1], 2, 6>;
defm : SKLWriteResPair<WriteCvtPD2IY, [SKLPort5,SKLPort01], 7, [1,1], 2, 6>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;

defm : SKLWriteResPair<WriteCvtI2SS, [SKLPort1], 4>;
defm : X86WriteRes<WriteCvtI2SS, [SKLPort5,SKLPort01], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtI2SSLd, [SKLPort23,SKLPort01], 10, [1,1], 2>;
defm : SKLWriteResPair<WriteCvtI2PS, [SKLPort01], 4, [1], 1, 6>;
defm : SKLWriteResPair<WriteCvtI2PSY, [SKLPort01], 4, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : SKLWriteResPair<WriteCvtI2SD, [SKLPort1], 4>;
defm : SKLWriteResPair<WriteCvtI2PD, [SKLPort0,SKLPort5], 5, [1,1], 2, 6>;
defm : SKLWriteResPair<WriteCvtI2PDY, [SKLPort0,SKLPort5], 7, [1,1], 2, 6>;
defm : X86WriteRes<WriteCvtI2SD, [SKLPort5,SKLPort01], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtI2SDLd, [SKLPort23,SKLPort01], 10, [1,1], 2>;
defm : SKLWriteResPair<WriteCvtI2PD, [SKLPort0,SKLPort5], 5, [1,1], 2, 6>;
defm : SKLWriteResPair<WriteCvtI2PDY, [SKLPort0,SKLPort5], 7, [1,1], 2, 6>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;

defm : X86WriteRes<WriteCvtSS2SD, [SKLPort5,SKLPort01], 5, [1,1], 2>;
Expand Down Expand Up @@ -928,10 +930,7 @@ def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort01]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVT(T?)PS2PIrr",
"(V?)CVTSI642SDrr",
"(V?)CVTSI2SDrr",
"(V?)CVTSI2SSrr")>;
def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVT(T?)PS2PIrr")>;

def SKLWriteResGroup61 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06]> {
let Latency = 5;
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
Expand Up @@ -1146,12 +1146,12 @@ vzeroupper
# CHECK-NEXT: 3 10 1.00 * vcvtsd2ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 5 1.00 vcvtsi2sd %ecx, %xmm0, %xmm2
# CHECK-NEXT: 2 5 1.00 vcvtsi2sd %rcx, %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * vcvtsi2sdl (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * vcvtsi2sdq (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcvtsi2sdl (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcvtsi2sdq (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 5 1.00 vcvtsi2ss %ecx, %xmm0, %xmm2
# CHECK-NEXT: 3 6 2.00 vcvtsi2ss %rcx, %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * vcvtsi2ssl (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * vcvtsi2ssq (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcvtsi2ssl (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcvtsi2ssq (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 5 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcvtss2sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 6 1.00 vcvtss2si %xmm0, %ecx
Expand Down Expand Up @@ -1736,7 +1736,7 @@ vzeroupper

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - 126.00 336.58 201.58 173.17 173.17 34.00 326.58 5.25 12.67
# CHECK-NEXT: - 126.00 338.58 199.58 173.17 173.17 34.00 326.58 5.25 12.67

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
Expand Down Expand Up @@ -1856,12 +1856,12 @@ vzeroupper
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtsd2ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2sd %ecx, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2sd %rcx, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2sdl (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2sdq (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtsi2sdl (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtsi2sdq (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2ss %ecx, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vcvtsi2ss %rcx, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2ssl (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2ssq (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtsi2ssl (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtsi2ssq (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtss2sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtss2sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.50 0.50 - - - - - - vcvtss2si %xmm0, %ecx
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
Expand Up @@ -214,8 +214,8 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 2 9 0.50 * cvtps2pi (%rax), %mm2
# CHECK-NEXT: 2 5 1.00 cvtsi2ss %ecx, %xmm2
# CHECK-NEXT: 3 6 2.00 cvtsi2ss %rcx, %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2ssq (%rax), %xmm2
# CHECK-NEXT: 2 10 0.50 * cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: 2 10 0.50 * cvtsi2ssq (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 cvtss2si %xmm0, %ecx
# CHECK-NEXT: 3 7 1.00 cvtss2si %xmm0, %rcx
# CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %ecx
Expand Down Expand Up @@ -333,7 +333,7 @@ xorps (%rax), %xmm2

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - 24.00 71.83 24.83 32.00 32.00 8.00 31.83 0.50 3.00
# CHECK-NEXT: - 24.00 72.83 23.83 32.00 32.00 8.00 31.83 0.50 3.00

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
Expand All @@ -357,8 +357,8 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2pi (%rax), %mm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2ss %ecx, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - cvtsi2ss %rcx, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssq (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtsi2ssq (%rax), %xmm2
# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvtss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvtss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %ecx
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
Expand Up @@ -446,8 +446,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 3 10 1.00 * cvtsd2ss (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvtsi2sd %ecx, %xmm2
# CHECK-NEXT: 2 5 1.00 cvtsi2sd %rcx, %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2sdq (%rax), %xmm2
# CHECK-NEXT: 2 10 0.50 * cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: 2 10 0.50 * cvtsi2sdq (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvtss2sd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * cvtss2sd (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvttpd2dq %xmm0, %xmm2
Expand Down Expand Up @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - 40.00 112.58 80.58 63.50 63.50 14.00 93.58 2.25 5.00
# CHECK-NEXT: - 40.00 113.58 79.58 63.50 63.50 14.00 93.58 2.25 5.00

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
Expand Down Expand Up @@ -732,8 +732,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtsd2ss (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2sd %ecx, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2sd %rcx, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2sdq (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtsi2sdq (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtss2sd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtss2sd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvttpd2dq %xmm0, %xmm2
Expand Down

0 comments on commit fca6364

Please sign in to comment.