Skip to content

Commit

Permalink
[X86] Add variable shuffle schedule classes
Browse files Browse the repository at this point in the history
Split variable index shuffles from immediate index shuffles

WriteFVarShuffle - variable 'in-lane' shuffles (VPERMILPS/VPERMIL2PS etc.)
WriteVarShuffle - variable 'in-lane' shuffles (PSHUFB/VPPERM etc.)

WriteFVarShuffle256 - variable 'cross-lane' shuffles (VPERMPS etc.)
WriteVarShuffle256 - variable 'cross-lane' shuffles (VPERMD etc.)

Differential Revision: https://reviews.llvm.org/D45404

llvm-svn: 329806
  • Loading branch information
RKSimon committed Apr 11, 2018
1 parent 7bcb572 commit 89c8a10
Show file tree
Hide file tree
Showing 17 changed files with 117 additions and 181 deletions.
6 changes: 3 additions & 3 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Expand Up @@ -1725,12 +1725,12 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
//===----------------------------------------------------------------------===//
// -- VPERMI2 - 3 source operands form --

let Sched = WriteFShuffle256 in
let Sched = WriteFVarShuffle256 in
def AVX512_PERM2_F : OpndItins<
IIC_SSE_SHUFP, IIC_SSE_SHUFP
>;

let Sched = WriteShuffle256 in
let Sched = WriteVarShuffle256 in
def AVX512_PERM2_I : OpndItins<
IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
>;
Expand Down Expand Up @@ -8969,7 +8969,7 @@ let Predicates = [HasDQI, NoBWI] in {
//

// FIXME: Is there a better scheduler itinerary for VPCOMPRESS/VPEXPAND?
let Sched = WriteShuffle256 in {
let Sched = WriteVarShuffle256 in {
def AVX512_COMPRESS : OpndItins<
IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
>;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86InstrMMX.td
Expand Up @@ -74,11 +74,12 @@ def MMX_UNPCK_L_ITINS : OpndItins<
def MMX_PCK_ITINS : OpndItins<
IIC_MMX_PCK_RR, IIC_MMX_PCK_RM
>;
} // Sched

let Sched = WriteVarShuffle in
def MMX_PSHUF_ITINS : OpndItins<
IIC_MMX_PSHUF, IIC_MMX_PSHUF
>;
} // Sched

let Sched = WriteCvtF2I in {
def MMX_CVT_PD_ITINS : OpndItins<
Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/Target/X86/X86InstrSSE.td
Expand Up @@ -5050,7 +5050,7 @@ def SSE_PHADDSUBW : OpndItins<
IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM
>;
}
let Sched = WriteShuffle in
let Sched = WriteVarShuffle in
def SSE_PSHUFB : OpndItins<
IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM
>;
Expand Down Expand Up @@ -7688,7 +7688,7 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
// VPERMIL - Permute Single and Double Floating-Point Values
//

let Sched = WriteFShuffle in
let Sched = WriteFVarShuffle in
def AVX_VPERMILV : OpndItins<
IIC_SSE_SHUFP, IIC_SSE_SHUFP
>;
Expand All @@ -7707,13 +7707,13 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
(ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V,
Sched<[WriteFShuffle]>;
Sched<[WriteFVarShuffle]>;
def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop_i:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (f_vt (X86VPermilpv RC:$src1,
(i_vt (bitconvert (i_frag addr:$src2))))))]>, VEX_4V,
Sched<[WriteFShuffleLd, ReadAfterLd]>;
Sched<[WriteFVarShuffleLd, ReadAfterLd]>;

def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2),
Expand Down Expand Up @@ -8181,10 +8181,10 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
}
}

defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256,
defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteVarShuffle256,
i256mem>;
let ExeDomain = SSEPackedSingle in
defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256,
defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFVarShuffle256,
f256mem>;

multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
Expand Down
16 changes: 8 additions & 8 deletions llvm/lib/Target/X86/X86InstrXOP.td
Expand Up @@ -279,23 +279,23 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
(vt128 VR128:$src3))))]>,
XOP_4V, Sched<[WriteShuffle]>;
XOP_4V, Sched<[WriteVarShuffle]>;
def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i128mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
(vt128 (bitconvert (loadv2i64 addr:$src3))))))]>,
XOP_4V, VEX_W, Sched<[WriteShuffleLd, ReadAfterLd, ReadAfterLd]>;
XOP_4V, VEX_W, Sched<[WriteVarShuffleLd, ReadAfterLd, ReadAfterLd]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))),
(vt128 VR128:$src3))))]>,
XOP_4V, Sched<[WriteShuffleLd, ReadAfterLd,
XOP_4V, Sched<[WriteVarShuffleLd, ReadAfterLd,
// 128mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
Expand All @@ -307,7 +307,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_4V, VEX_W, Sched<[WriteShuffle]>, FoldGenData<NAME#rrr>;
[]>, XOP_4V, VEX_W, Sched<[WriteVarShuffle]>, FoldGenData<NAME#rrr>;
}

let ExeDomain = SSEPackedInt in {
Expand Down Expand Up @@ -367,7 +367,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>,
Sched<[WriteFShuffle]>;
Sched<[WriteFVarShuffle]>;
def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, intmemop:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
Expand All @@ -376,15 +376,15 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
(VT (X86vpermil2 RC:$src1, RC:$src2,
(bitconvert (IntLdFrag addr:$src3)),
(i8 imm:$src4))))]>, VEX_W,
Sched<[WriteFShuffleLd, ReadAfterLd, ReadAfterLd]>;
Sched<[WriteFVarShuffleLd, ReadAfterLd, ReadAfterLd]>;
def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
RC:$src3, (i8 imm:$src4))))]>,
Sched<[WriteFShuffleLd, ReadAfterLd,
Sched<[WriteFVarShuffleLd, ReadAfterLd,
// fpmemop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// RC:$src3
Expand All @@ -395,7 +395,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
(ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[]>, VEX_W, Sched<[WriteFShuffle]>, FoldGenData<NAME#rr>;
[]>, VEX_W, Sched<[WriteFVarShuffle]>, FoldGenData<NAME#rr>;
}

let ExeDomain = SSEPackedDouble in {
Expand Down
10 changes: 4 additions & 6 deletions llvm/lib/Target/X86/X86SchedBroadwell.td
Expand Up @@ -162,6 +162,7 @@ defm : BWWriteResPair<WriteFRcp, [BWPort0], 5>; // Floating point reciprocal e
defm : BWWriteResPair<WriteFRsqrt, [BWPort0], 5>; // Floating point reciprocal square root estimate.
defm : BWWriteResPair<WriteFMA, [BWPort01], 5>; // Fused Multiply Add.
defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1>; // Floating point vector shuffles.
defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1>; // Floating point vector variable shuffles.
defm : BWWriteResPair<WriteFBlend, [BWPort015], 1>; // Floating point vector blends.
defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2]>; // Fp vector variable blends.

Expand All @@ -178,6 +179,7 @@ defm : BWWriteResPair<WriteVecShift, [BWPort0], 1>; // Vector integer shifts.
defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5>; // Vector integer multiply.
defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // PMULLD
defm : BWWriteResPair<WriteShuffle, [BWPort5], 1>; // Vector shuffles.
defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1>; // Vector variable shuffles.
defm : BWWriteResPair<WriteBlend, [BWPort15], 1>; // Vector blends.
defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2]>; // Vector variable blends.
defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 6, [1, 2]>; // Vector MPSAD.
Expand Down Expand Up @@ -288,7 +290,9 @@ def : WriteRes<WriteSystem, [BWPort0156]> { let Latency = 100; } // def Writ

// AVX2.
defm : BWWriteResPair<WriteFShuffle256, [BWPort5], 3>; // Fp 256-bit width vector shuffles.
defm : BWWriteResPair<WriteFVarShuffle256, [BWPort5], 3>; // Fp 256-bit width vector variable shuffles.
defm : BWWriteResPair<WriteShuffle256, [BWPort5], 3>; // 256-bit width vector shuffles.
defm : BWWriteResPair<WriteVarShuffle256, [BWPort5], 3>; // 256-bit width vector variable shuffles.
defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 2, [2, 1]>; // Variable vector shifts.

// Old microcoded instructions that nobody use.
Expand Down Expand Up @@ -366,7 +370,6 @@ def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64rr",
"MMX_MOVD64to64rr",
"MMX_MOVQ2DQrr",
"MMX_PALIGNRrri",
"MMX_PSHUFBrr",
"MMX_PSHUFWri",
"MMX_PUNPCKHBWirr",
"MMX_PUNPCKHDQirr",
Expand Down Expand Up @@ -404,9 +407,7 @@ def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64rr",
"VPBROADCASTDrr",
"VPBROADCASTQrr",
"VPERMILPD(Y?)ri",
"VPERMILPD(Y?)rr",
"VPERMILPS(Y?)ri",
"VPERMILPS(Y?)rr",
"(V?)PMOVSXBDrr",
"(V?)PMOVSXBQrr",
"(V?)PMOVSXBWrr",
Expand All @@ -419,7 +420,6 @@ def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64rr",
"(V?)PMOVZXDQrr",
"(V?)PMOVZXWDrr",
"(V?)PMOVZXWQrr",
"(V?)PSHUFB(Y?)rr",
"(V?)PSHUFD(Y?)ri",
"(V?)PSHUFHW(Y?)ri",
"(V?)PSHUFLW(Y?)ri",
Expand Down Expand Up @@ -891,9 +891,7 @@ def: InstRW<[BWWriteResGroup28], (instregex "VBROADCASTSDYrr",
"VPBROADCASTW(Y?)rr",
"VPERM2F128rr",
"VPERM2I128rr",
"VPERMDYrr",
"VPERMPDYri",
"VPERMPSYrr",
"VPERMQYri",
"VPMOVSXBDYrr",
"VPMOVSXBQYrr",
Expand Down
10 changes: 4 additions & 6 deletions llvm/lib/Target/X86/X86SchedHaswell.td
Expand Up @@ -159,8 +159,10 @@ defm : HWWriteResPair<WriteCvtI2F, [HWPort1], 4>;
defm : HWWriteResPair<WriteCvtF2F, [HWPort1], 3>;
defm : HWWriteResPair<WriteFMA, [HWPort01], 5>;
defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1>;
defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1>;
defm : HWWriteResPair<WriteFBlend, [HWPort015], 1>;
defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3>;
defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3>;
defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2]>;

// Vector integer operations.
Expand All @@ -174,8 +176,10 @@ defm : HWWriteResPair<WriteVecALU, [HWPort15], 1>;
defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5>;
defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>;
defm : HWWriteResPair<WriteShuffle, [HWPort5], 1>;
defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1>;
defm : HWWriteResPair<WriteBlend, [HWPort15], 1>;
defm : HWWriteResPair<WriteShuffle256, [HWPort5], 3>;
defm : HWWriteResPair<WriteVarShuffle256, [HWPort5], 3>;
defm : HWWriteResPair<WriteVarBlend, [HWPort5], 2, [2]>;
defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 2, [2, 1]>;
defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 6, [1, 2]>;
Expand Down Expand Up @@ -724,7 +728,6 @@ def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64rr",
"MMX_MOVD64to64rr",
"MMX_MOVQ2DQrr",
"MMX_PALIGNRrri",
"MMX_PSHUFBrr",
"MMX_PSHUFWri",
"MMX_PUNPCKHBWirr",
"MMX_PUNPCKHDQirr",
Expand Down Expand Up @@ -762,9 +765,7 @@ def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64rr",
"VPBROADCASTDrr",
"VPBROADCASTQrr",
"VPERMILPD(Y?)ri",
"VPERMILPD(Y?)rr",
"VPERMILPS(Y?)ri",
"VPERMILPS(Y?)rr",
"(V?)PMOVSXBDrr",
"(V?)PMOVSXBQrr",
"(V?)PMOVSXBWrr",
Expand All @@ -777,7 +778,6 @@ def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64rr",
"(V?)PMOVZXDQrr",
"(V?)PMOVZXWDrr",
"(V?)PMOVZXWQrr",
"(V?)PSHUFB(Y?)rr",
"(V?)PSHUFD(Y?)ri",
"(V?)PSHUFHW(Y?)ri",
"(V?)PSHUFLW(Y?)ri",
Expand Down Expand Up @@ -1780,9 +1780,7 @@ def: InstRW<[HWWriteResGroup51], (instregex "VBROADCASTSDYrr",
"VPBROADCASTWrr",
"VPERM2F128rr",
"VPERM2I128rr",
"VPERMDYrr",
"VPERMPDYri",
"VPERMPSYrr",
"VPERMQYri",
"VPMOVSXBDYrr",
"VPMOVSXBQYrr",
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/X86/X86SchedSandyBridge.td
Expand Up @@ -148,6 +148,7 @@ defm : SBWriteResPair<WriteCvtF2I, [SBPort1], 3>;
defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>;
defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteFBlend, [SBPort05], 1>;
defm : SBWriteResPair<WriteFVarBlend, [SBPort0, SBPort5], 2>;

Expand All @@ -162,6 +163,7 @@ defm : SBWriteResPair<WriteVecALU, [SBPort1], 3>;
defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5>;
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>; // TODO this is probably wrong for 256/512-bit for the "generic" model
defm : SBWriteResPair<WriteShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1>;
defm : SBWriteResPair<WriteBlend, [SBPort15], 1>;
defm : SBWriteResPair<WriteVarBlend, [SBPort1, SBPort5], 2>;
defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 5, [1,2], 3>;
Expand Down Expand Up @@ -275,7 +277,9 @@ def : WriteRes<WriteNop, []>;
// AVX2/FMA is not supported on that architecture, but we should define the basic
// scheduling resources anyway.
defm : SBWriteResPair<WriteFShuffle256, [SBPort0], 1>;
defm : SBWriteResPair<WriteFVarShuffle256, [SBPort0], 1>;
defm : SBWriteResPair<WriteShuffle256, [SBPort0], 1>;
defm : SBWriteResPair<WriteVarShuffle256, [SBPort0], 1>;
defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1>;
defm : SBWriteResPair<WriteFMA, [SBPort01], 5>;

Expand Down Expand Up @@ -352,9 +356,7 @@ def: InstRW<[SBWriteResGroup2], (instregex "FDECSTP",
"(V?)ORPS(Y?)rr",
"VPERM2F128rr",
"VPERMILPD(Y?)ri",
"VPERMILPD(Y?)rr",
"VPERMILPS(Y?)ri",
"VPERMILPS(Y?)rr",
"(V?)SHUFPD(Y?)rri",
"(V?)SHUFPS(Y?)rri",
"(V?)UNPCKHPD(Y?)rr",
Expand Down Expand Up @@ -408,7 +410,6 @@ def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSBrr",
"MMX_PABSWrr",
"MMX_PADDQirr",
"MMX_PALIGNRrri",
"MMX_PSHUFBrr",
"MMX_PSIGNBrr",
"MMX_PSIGNDrr",
"MMX_PSIGNWrr",
Expand Down Expand Up @@ -462,7 +463,6 @@ def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSBrr",
"(V?)PMOVZXDQrr",
"(V?)PMOVZXWDrr",
"(V?)PMOVZXWQrr",
"(V?)PSHUFBrr",
"(V?)PSHUFDri",
"(V?)PSHUFHWri",
"(V?)PSHUFLWri",
Expand Down

0 comments on commit 89c8a10

Please sign in to comment.