Skip to content

Commit

Permalink
[X86] Add WriteCRC32 scheduler class
Browse files Browse the repository at this point in the history
Currently CRC32 instructions use the WriteFAdd class, this patch splits them off into their own, at the moment it is still mostly just a duplicate of WriteFAdd but it can now be tweaked on a target by target basis.

Differential Revision: https://reviews.llvm.org/D44647

llvm-svn: 328582
  • Loading branch information
RKSimon committed Mar 26, 2018
1 parent 78fdca3 commit 28e7bcb
Show file tree
Hide file tree
Showing 11 changed files with 21 additions and 30 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/Target/X86/X86InstrSSE.td
Expand Up @@ -7074,14 +7074,14 @@ class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut,
SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
[(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))], IIC_CRC32_REG>,
Sched<[WriteFAdd]>;
Sched<[WriteCRC32]>;

class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
X86MemOperand x86memop, SDPatternOperator Int> :
SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
[(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))],
IIC_CRC32_MEM>, Sched<[WriteFAddLd, ReadAfterLd]>;
IIC_CRC32_MEM>, Sched<[WriteCRC32Ld, ReadAfterLd]>;

let Constraints = "$src1 = $dst" in {
def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem,
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86SchedBroadwell.td
Expand Up @@ -106,6 +106,7 @@ def : WriteRes<WriteRMW, [BWPort4]>;
defm : BWWriteResPair<WriteALU, [BWPort0156], 1>; // Simple integer ALU op.
defm : BWWriteResPair<WriteIMul, [BWPort1], 3>; // Integer multiplication.
defm : BWWriteResPair<WriteIDiv, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.

def : WriteRes<WriteLEA, [BWPort15]>; // LEA instructions can't fold loads.
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86SchedHaswell.td
Expand Up @@ -115,6 +115,7 @@ defm : HWWriteResPair<WriteIMul, [HWPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
defm : HWWriteResPair<WriteShift, [HWPort06], 1>;
defm : HWWriteResPair<WriteJump, [HWPort06], 1>;
defm : HWWriteResPair<WriteCRC32, [HWPort1], 3>;

// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
Expand Down
7 changes: 2 additions & 5 deletions llvm/lib/Target/X86/X86SchedSandyBridge.td
Expand Up @@ -106,6 +106,7 @@ def : WriteRes<WriteIMulH, []> { let Latency = 3; }

defm : SBWriteResPair<WriteShift, [SBPort05], 1>;
defm : SBWriteResPair<WriteJump, [SBPort5], 1>;
defm : SBWriteResPair<WriteCRC32, [SBPort1], 3, [1], 1, 5>;

// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
Expand Down Expand Up @@ -678,8 +679,6 @@ def: InstRW<[SBWriteResGroup21], (instrs MUL8r, IMUL16rr, IMUL32rr, IMUL32rri, I
def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0",
"ADD_FST0r",
"ADD_FrST0",
"CRC32r(16|32|64)r8",
"CRC32r(16|32|64)r64",
"MMX_CVTPI2PSirr",
"MMX_CVTPS2PIirr",
"MMX_CVTTPS2PIirr",
Expand Down Expand Up @@ -1416,9 +1415,7 @@ def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SBWriteResGroup72], (instregex "CRC32r(16|32|64)m64",
"CRC32r(16|32|64)m8",
"FCOM32m",
def: InstRW<[SBWriteResGroup72], (instregex "FCOM32m",
"FCOM64m",
"FCOMP32m",
"FCOMP64m")>;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86SchedSkylakeClient.td
Expand Up @@ -106,6 +106,7 @@ def : WriteRes<WriteRMW, [SKLPort4]>;
defm : SKLWriteResPair<WriteALU, [SKLPort0156], 1>; // Simple integer ALU op.
defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication.
defm : SKLWriteResPair<WriteIDiv, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>; // Integer division.
defm : SKLWriteResPair<WriteCRC32, [SKLPort1], 3>;

def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
def : WriteRes<WriteLEA, [SKLPort15]>; // LEA instructions can't fold loads.
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86SchedSkylakeServer.td
Expand Up @@ -106,6 +106,7 @@ def : WriteRes<WriteRMW, [SKXPort4]>;
defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op.
defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication.
defm : SKXWriteResPair<WriteIDiv, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>; // Integer division.
defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>;

def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
def : WriteRes<WriteLEA, [SKXPort15]>; // LEA instructions can't fold loads.
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86Schedule.td
Expand Up @@ -110,6 +110,9 @@ defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.

// CRC32 instruction.
defm WriteCRC32 : X86SchedWritePair;

// Strings instructions.
// Packed Compare Implicit Length Strings, Return Mask
defm WritePCmpIStrM : X86SchedWritePair;
Expand Down
17 changes: 1 addition & 16 deletions llvm/lib/Target/X86/X86ScheduleBtVer2.td
Expand Up @@ -124,6 +124,7 @@ def : WriteRes<WriteRMW, [JSAGU]>;
defm : JWriteResIntPair<WriteALU, [JALU01], 1>;
defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
defm : JWriteResIntPair<WriteIDiv, [JALU1, JDiv], 41, [1, 41], 2>; // Worst case (i64 division)
defm : JWriteResIntPair<WriteCRC32, [JALU01], 3, [4], 3>;

def : WriteRes<WriteIMulH, [JALU1]> {
let Latency = 6;
Expand Down Expand Up @@ -190,22 +191,6 @@ def JWriteIDiv32Ld : SchedWriteRes<[JLAGU, JALU1, JDiv]> {
def : InstRW<[JWriteIDiv32], (instrs DIV32r, IDIV32r)>;
def : InstRW<[JWriteIDiv32Ld], (instrs DIV32m, IDIV32m)>;

def JWriteCRC32 : SchedWriteRes<[JALU01]> {
let Latency = 3;
let ResourceCycles = [4];
let NumMicroOps = 3;
}
def : InstRW<[JWriteCRC32], (instrs CRC32r32r8, CRC32r32r16, CRC32r32r32,
CRC32r64r8, CRC32r64r64)>;

def JWriteCRC32Ld : SchedWriteRes<[JLAGU, JALU01]> {
let Latency = 6;
let ResourceCycles = [1, 4];
let NumMicroOps = 3;
}
def : InstRW<[JWriteCRC32Ld], (instrs CRC32r32m8, CRC32r32m16, CRC32r32m32,
CRC32r64m8, CRC32r64m64)>;

////////////////////////////////////////////////////////////////////////////////
// Integer shifts and rotates.
////////////////////////////////////////////////////////////////////////////////
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86ScheduleSLM.td
Expand Up @@ -90,7 +90,8 @@ def : InstRW<[WriteMove], (instrs COPY)>;
defm : SLMWriteResPair<WriteALU, [SLM_IEC_RSV01], 1>;
defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>;
defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>;
defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>;
defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>;
defm : SLMWriteResPair<WriteCRC32, [SLM_IEC_RSV1], 3>;

// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86ScheduleZnver1.td
Expand Up @@ -151,6 +151,7 @@ defm : ZnWriteResPair<WriteALU, [ZnALU], 1>;
defm : ZnWriteResPair<WriteIMul, [ZnALU1, ZnMultiplier], 4>;
defm : ZnWriteResPair<WriteShift, [ZnALU], 1>;
defm : ZnWriteResPair<WriteJump, [ZnALU], 1>;
defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>;

// Bit counts.
defm : ZnWriteResPair<WriteBitScan, [ZnALU], 3>;
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/sse42-schedule.ll
Expand Up @@ -141,7 +141,7 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) {
; GENERIC-LABEL: crc32_32_16:
; GENERIC: # %bb.0:
; GENERIC-NEXT: crc32w %si, %edi # sched: [3:1.00]
; GENERIC-NEXT: crc32w (%rdx), %edi # sched: [7:1.00]
; GENERIC-NEXT: crc32w (%rdx), %edi # sched: [8:1.00]
; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
Expand All @@ -155,14 +155,14 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) {
; SANDY-SSE-LABEL: crc32_32_16:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00]
; SANDY-SSE-NEXT: crc32w (%rdx), %edi # sched: [7:1.00]
; SANDY-SSE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00]
; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: crc32_32_16:
; SANDY: # %bb.0:
; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00]
; SANDY-NEXT: crc32w (%rdx), %edi # sched: [7:1.00]
; SANDY-NEXT: crc32w (%rdx), %edi # sched: [8:1.00]
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
Expand Down Expand Up @@ -260,7 +260,7 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) {
; GENERIC-LABEL: crc32_32_32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: crc32l %esi, %edi # sched: [3:1.00]
; GENERIC-NEXT: crc32l (%rdx), %edi # sched: [7:1.00]
; GENERIC-NEXT: crc32l (%rdx), %edi # sched: [8:1.00]
; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
Expand All @@ -274,14 +274,14 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) {
; SANDY-SSE-LABEL: crc32_32_32:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00]
; SANDY-SSE-NEXT: crc32l (%rdx), %edi # sched: [7:1.00]
; SANDY-SSE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00]
; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: crc32_32_32:
; SANDY: # %bb.0:
; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00]
; SANDY-NEXT: crc32l (%rdx), %edi # sched: [7:1.00]
; SANDY-NEXT: crc32l (%rdx), %edi # sched: [8:1.00]
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
Expand Down

0 comments on commit 28e7bcb

Please sign in to comment.