Skip to content

Commit

Permalink
[X86] Split X87/SSE compare classes into WriteFCom + WriteFComX
Browse files Browse the repository at this point in the history
Most X87 compare instructions write to the X87 status word, while the SSE (U)COMI compares write to rFLAGS. These are often handled very differently on CPUs (e.g. rFLAGS outputs typically involve a fpu2gpr transfer), and we shouldn't be grouping all these instructions behind a single class - so this patch splits off the SSE compares into a new WriteFComX class (and currently keeps the same behaviours). If there's a need to distinguish between X87 instructions more closely we can investigate that in the future, but as we don't handle any of the X87 side effects at the moment its unlikely to have any notable effect.
  • Loading branch information
RKSimon committed Jan 17, 2020
1 parent 1dc2f25 commit 8eb4d25
Show file tree
Hide file tree
Showing 14 changed files with 19 additions and 7 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86InstrAVX512.td
Expand Up @@ -8552,7 +8552,7 @@ let Predicates = [HasVLX] in {
// Unordered/Ordered scalar fp compare with Sae and set EFLAGS
multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr, Domain d,
X86FoldableSchedWrite sched = WriteFCom> {
X86FoldableSchedWrite sched = WriteFComX> {
let hasSideEffects = 0, Uses = [MXCSR] in
def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/X86/X86InstrSSE.td
Expand Up @@ -1830,7 +1830,7 @@ let Constraints = "$src1 = $dst" in {
multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
ValueType vt, X86MemOperand x86memop,
PatFrag ld_frag, string OpcodeStr, Domain d,
X86FoldableSchedWrite sched = WriteFCom> {
X86FoldableSchedWrite sched = WriteFComX> {
let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1,
ExeDomain = d in {
def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
Expand All @@ -1851,7 +1851,7 @@ multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
ValueType vt, Operand memop,
ComplexPattern mem_cpat, string OpcodeStr,
Domain d,
X86FoldableSchedWrite sched = WriteFCom> {
X86FoldableSchedWrite sched = WriteFComX> {
let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = d in {
def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86SchedBroadwell.td
Expand Up @@ -260,7 +260,8 @@ defm : BWWriteResPair<WriteFCmp64X, [BWPort1], 3, [1], 1, 5>; // Floating point
defm : BWWriteResPair<WriteFCmp64Y, [BWPort1], 3, [1], 1, 6>; // Floating point double compare (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;

defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags.
defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags (X87).
defm : BWWriteResPair<WriteFComX, [BWPort1], 3>; // Floating point compare to flags (SSE).

defm : BWWriteResPair<WriteFMul, [BWPort01], 3, [1], 1, 5>; // Floating point multiplication.
defm : BWWriteResPair<WriteFMulX, [BWPort01], 3, [1], 1, 5>; // Floating point multiplication (XMM).
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86SchedHaswell.td
Expand Up @@ -261,6 +261,7 @@ defm : HWWriteResPair<WriteFCmp64Y, [HWPort1], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFCmp64Z, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1

defm : HWWriteResPair<WriteFCom, [HWPort1], 3>;
defm : HWWriteResPair<WriteFComX, [HWPort1], 3>;

defm : HWWriteResPair<WriteFMul, [HWPort01], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFMulX, [HWPort01], 5, [1], 1, 6>;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86SchedSandyBridge.td
Expand Up @@ -238,6 +238,7 @@ defm : SBWriteResPair<WriteFCmp64Y, [SBPort1], 3, [1], 1, 7>;
defm : SBWriteResPair<WriteFCmp64Z, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1

defm : SBWriteResPair<WriteFCom, [SBPort1], 3>;
defm : SBWriteResPair<WriteFComX, [SBPort1], 3>;

defm : SBWriteResPair<WriteFMul, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFMulX, [SBPort0], 5, [1], 1, 6>;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86SchedSkylakeClient.td
Expand Up @@ -255,7 +255,8 @@ defm : SKLWriteResPair<WriteFCmp64X, [SKLPort01], 4, [1], 1, 6>;
defm : SKLWriteResPair<WriteFCmp64Y, [SKLPort01], 4, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;

defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags.
defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags (X87).
defm : SKLWriteResPair<WriteFComX, [SKLPort0], 2>; // Floating point compare to flags (SSE).

defm : SKLWriteResPair<WriteFMul, [SKLPort01], 4, [1], 1, 5>; // Floating point multiplication.
defm : SKLWriteResPair<WriteFMulX, [SKLPort01], 4, [1], 1, 6>;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86SchedSkylakeServer.td
Expand Up @@ -255,7 +255,8 @@ defm : SKXWriteResPair<WriteFCmp64X, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFCmp64Y, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFCmp64Z, [SKXPort05], 4, [1], 1, 7>;

defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags.
defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags (X87).
defm : SKXWriteResPair<WriteFComX, [SKXPort0], 2>; // Floating point compare to flags (SSE).

defm : SKXWriteResPair<WriteFMul, [SKXPort01], 4, [1], 1, 5>; // Floating point multiplication.
defm : SKXWriteResPair<WriteFMulX, [SKXPort01], 4, [1], 1, 6>;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86Schedule.td
Expand Up @@ -250,7 +250,8 @@ defm WriteFCmp64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double
defm WriteFCmp64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double compare (XMM).
defm WriteFCmp64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (YMM).
defm WriteFCmp64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (ZMM).
defm WriteFCom : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare to flags.
defm WriteFCom : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare to flags (X87).
defm WriteFComX : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare to flags (SSE).
defm WriteFMul : X86SchedWritePair<ReadAfterVecLd>; // Floating point multiplication.
defm WriteFMulX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point multiplication (XMM).
defm WriteFMulY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86ScheduleAtom.td
Expand Up @@ -244,6 +244,7 @@ defm : AtomWriteResPair<WriteFCmp64X, [AtomPort01], [AtomPort01], 6, 7,
defm : X86WriteResPairUnsupported<WriteFCmp64Y>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFComX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFMulX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteFMulY>;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86ScheduleBdVer2.td
Expand Up @@ -766,6 +766,7 @@ defm : PdWriteResYMMPair<WriteFCmp64Y, [PdFPU0, PdFPFMA], 2, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;

defm : PdWriteResXMMPair<WriteFCom, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
defm : PdWriteResXMMPair<WriteFComX, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;

def PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> {
let Latency = 6;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86ScheduleBtVer2.td
Expand Up @@ -541,6 +541,7 @@ defm : JWriteResFpuPair<WriteFCmp64X, [JFPU0, JFPA], 2>;
defm : JWriteResYMMPair<WriteFCmp64Y, [JFPU0, JFPA], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : JWriteResFpuPair<WriteFCom, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResFpuPair<WriteFComX, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResFpuPair<WriteFMul, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFMulX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFMulY, [JFPU1, JFPM], 2, [2,2], 2>;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86ScheduleSLM.td
Expand Up @@ -214,6 +214,7 @@ defm : SLMWriteResPair<WriteFCmp64X, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmp64Y, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFComX, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMulX, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMulY, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86ScheduleZnver1.td
Expand Up @@ -295,6 +295,7 @@ defm : ZnWriteResFpuPair<WriteFCmp64X, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCmp64Y, [ZnFPU0], 3>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFComX, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteFBlendY, [ZnFPU01], 1>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86ScheduleZnver2.td
Expand Up @@ -281,6 +281,7 @@ defm : Zn2WriteResFpuPair<WriteFCmp64X, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFCmp64Y, [Zn2FPU0], 3>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : Zn2WriteResFpuPair<WriteFCom, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFComX, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFBlend, [Zn2FPU01], 1>;
defm : Zn2WriteResFpuPair<WriteFBlendY, [Zn2FPU01], 1>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
Expand Down

0 comments on commit 8eb4d25

Please sign in to comment.