Skip to content

Commit

Permalink
[X86] Custom isel floating point X86ISD::CMP on pre-CMOV targets. Eli…
Browse files Browse the repository at this point in the history
…minate ConvertCmpIfNecessary

If we don't have cmov, X87 compares write to FPSW and we need to
move the bits to EFLAGS to use as JCC/SETCC/CMOV conditions.

Previously this was done by calling ConvertCmpIfNecessary in
multiple places which would emit the extra code for the FNSTSW,
a shift, a truncate, and a SAHF instructions. Isel would then
select trunc+X86ISD::CMP to a FUCOM instruction that produces FPSW.

This patch centralizes all of the handling into a single custom
isel handler. This allows us to remove ConvertCmpIfNecessary and
a couple target specific ISD opcodes.

Differential Revision: https://reviews.llvm.org/D73863
  • Loading branch information
topperc committed Feb 6, 2020
1 parent 8d19af6 commit 4175d7e
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 76 deletions.
78 changes: 72 additions & 6 deletions llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5037,17 +5037,83 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
return;
}

case X86ISD::CMP: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
case X86ISD::CMP:
case X86ISD::STRICT_FCMP:
case X86ISD::STRICT_FCMPS: {
bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP ||
Node->getOpcode() == X86ISD::STRICT_FCMPS;
SDValue N0 = Node->getOperand(IsStrictCmp ? 1 : 0);
SDValue N1 = Node->getOperand(IsStrictCmp ? 2 : 1);

// Save the original VT of the compare.
MVT CmpVT = N0.getSimpleValueType();

// Floating point needs special handling if we don't have FCOMI.
// FIXME: Should we have a X86ISD::FCMP to avoid mixing int and fp?
if (CmpVT.isFloatingPoint()) {
if (Subtarget->hasCMov())
break;

bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS;

unsigned Opc;
switch (CmpVT.SimpleTy) {
default: llvm_unreachable("Unexpected type!");
case MVT::f32:
Opc = IsSignaling ? X86::COM_Fpr32 : X86::UCOM_Fpr32;
break;
case MVT::f64:
Opc = IsSignaling ? X86::COM_Fpr64 : X86::UCOM_Fpr64;
break;
case MVT::f80:
Opc = IsSignaling ? X86::COM_Fpr80 : X86::UCOM_Fpr80;
break;
}

SDValue Cmp;
SDValue Chain =
IsStrictCmp ? Node->getOperand(0) : CurDAG->getEntryNode();
if (IsStrictCmp) {
SDVTList VTs = CurDAG->getVTList(MVT::i16, MVT::Other);
Cmp = SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {N0, N1, Chain}), 0);
Chain = Cmp.getValue(1);
} else {
Cmp = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i16, N0, N1), 0);
}

// Move FPSW to AX.
SDValue FPSW = CurDAG->getCopyToReg(Chain, dl, X86::FPSW, Cmp, SDValue());
Chain = FPSW;
SDValue FNSTSW =
SDValue(CurDAG->getMachineNode(X86::FNSTSW16r, dl, MVT::i16, FPSW,
FPSW.getValue(1)),
0);

// Extract upper 8-bits of AX.
SDValue Extract =
CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, FNSTSW);

// Move AH into flags.
// Some 64-bit targets lack SAHF support, but they do support FCOMI.
assert(Subtarget->hasLAHFSAHF() &&
"Target doesn't support SAHF or FCOMI?");
SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue());
Chain = AH;
SDValue SAHF = SDValue(
CurDAG->getMachineNode(X86::SAHF, dl, MVT::i32, AH.getValue(1)), 0);

if (IsStrictCmp)
ReplaceUses(SDValue(Node, 1), Chain);

ReplaceUses(SDValue(Node, 0), SAHF);
CurDAG->RemoveDeadNode(Node);
return;
}

// Optimizations for TEST compares.
if (!isNullConstant(N1))
break;

// Save the original VT of the compare.
MVT CmpVT = N0.getSimpleValueType();

// If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
// by a test instruction. The test should be removed later by
// analyzeCompare if we are using only the zero flag.
Expand Down
39 changes: 1 addition & 38 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20998,36 +20998,6 @@ static std::pair<SDValue, SDValue> EmitCmp(SDValue Op0, SDValue Op1,
return std::make_pair(Sub.getValue(1), SDValue());
}

/// Convert a comparison if required by the subtarget.
SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
SelectionDAG &DAG) const {
// If the subtarget does not support the FUCOMI instruction, floating-point
// comparisons have to be converted.
bool IsCmp = Cmp.getOpcode() == X86ISD::CMP;
bool IsStrictCmp = Cmp.getOpcode() == X86ISD::STRICT_FCMP ||
Cmp.getOpcode() == X86ISD::STRICT_FCMPS;

if (Subtarget.hasCMov() || (!IsCmp && !IsStrictCmp) ||
!Cmp.getOperand(IsStrictCmp ? 1 : 0).getValueType().isFloatingPoint() ||
!Cmp.getOperand(IsStrictCmp ? 2 : 1).getValueType().isFloatingPoint())
return Cmp;

// The instruction selector will select an FUCOM instruction instead of
// FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence
// build an SDNode sequence that transfers the result from FPSW into EFLAGS:
// (X86sahf (trunc (srl (X86fp_stsw (trunc (X86any_fcmp ...)), 8))))
SDLoc dl(Cmp);
SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp);
SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW);
SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW,
DAG.getConstant(8, dl, MVT::i8));
SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);

// Some 64-bit targets lack SAHF support, but they do support FCOMI.
assert(Subtarget.hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?");
return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
}

/// Check if replacement of SQRT with RSQRT should be disabled.
bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
Expand Down Expand Up @@ -21991,7 +21961,6 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
SDValue EFLAGS = Tmp.first;
if (Chain)
Chain = Tmp.second;
EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8);
return EFLAGS;
}
Expand Down Expand Up @@ -22131,8 +22100,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
/// Return true if opcode is a X86 logical comparison.
static bool isX86LogicalCmp(SDValue Op) {
unsigned Opc = Op.getOpcode();
if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI ||
Opc == X86ISD::SAHF)
if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI)
return true;
if (Op.getResNo() == 1 &&
(Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC ||
Expand Down Expand Up @@ -23085,7 +23053,6 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {

SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
Cond.getOperand(0), Cond.getOperand(1));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
CC = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
Expand All @@ -23101,7 +23068,6 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// separate test.
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
Cond.getOperand(0), Cond.getOperand(1));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
CC = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
Expand Down Expand Up @@ -23133,7 +23099,6 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
Cond = EmitTest(Cond, X86Cond, dl, DAG, Subtarget);
}
Cond = ConvertCmpIfNecessary(Cond, DAG);
return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cond);
}
Expand Down Expand Up @@ -29788,7 +29753,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(EH_RETURN)
NODE_NAME_CASE(TC_RETURN)
NODE_NAME_CASE(FNSTCW16m)
NODE_NAME_CASE(FNSTSW16r)
NODE_NAME_CASE(LCMPXCHG_DAG)
NODE_NAME_CASE(LCMPXCHG8_DAG)
NODE_NAME_CASE(LCMPXCHG16_DAG)
Expand Down Expand Up @@ -29913,7 +29877,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MEMBARRIER)
NODE_NAME_CASE(MFENCE)
NODE_NAME_CASE(SEG_ALLOCA)
NODE_NAME_CASE(SAHF)
NODE_NAME_CASE(RDRAND)
NODE_NAME_CASE(RDSEED)
NODE_NAME_CASE(RDPKRU)
Expand Down
9 changes: 0 additions & 9 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -541,12 +541,6 @@ namespace llvm {
MEMBARRIER,
MFENCE,

// Store FP status word into i16 register.
FNSTSW16r,

// Store contents of %ah into %eflags.
SAHF,

// Get a random integer and indicate whether it is valid in CF.
RDRAND,

Expand Down Expand Up @@ -1494,9 +1488,6 @@ namespace llvm {
MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
MachineBasicBlock *MBB) const;

/// Convert a comparison if required by the subtarget.
SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;

/// Emit flags for the given setcc condition and operands. Also returns the
/// corresponding X86 condition code constant in X86CC.
SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
Expand Down
27 changes: 9 additions & 18 deletions llvm/lib/Target/X86/X86InstrFPStack.td
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def SDTX86Fst : SDTypeProfile<0, 2, [SDTCisFP<0>,
SDTCisPtrTy<1>]>;
def SDTX86Fild : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;

def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;

Expand All @@ -34,7 +33,6 @@ def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
Expand Down Expand Up @@ -638,19 +636,13 @@ def FLDLN2 : I<0xD9, MRM_ED, (outs), (ins), "fldln2", []>;
} // SchedRW

// Floating point compares.
let SchedRW = [WriteFCom], Uses = [FPCW] in {
def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set FPSW, (trunc (X86any_fcmp RFP32:$lhs, RFP32:$rhs)))]>;
def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
[(set FPSW, (trunc (X86any_fcmp RFP64:$lhs, RFP64:$rhs)))]>;
def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
[(set FPSW, (trunc (X86any_fcmp RFP80:$lhs, RFP80:$rhs)))]>;
def COM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set FPSW, (trunc (X86strict_fcmps RFP32:$lhs, RFP32:$rhs)))]>;
def COM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
[(set FPSW, (trunc (X86strict_fcmps RFP64:$lhs, RFP64:$rhs)))]>;
def COM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
[(set FPSW, (trunc (X86strict_fcmps RFP80:$lhs, RFP80:$rhs)))]>;
let SchedRW = [WriteFCom], Uses = [FPCW], hasSideEffects = 0 in {
def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, []>;
def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, []>;
def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, []>;
def COM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, []>;
def COM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, []>;
def COM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, []>;
} // SchedRW
} // mayRaiseFPException = 1

Expand Down Expand Up @@ -701,10 +693,9 @@ def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RSTi:$reg),

// Floating point flag ops.
let SchedRW = [WriteALU] in {
let Defs = [AX, FPSW], Uses = [FPSW] in
let Defs = [AX, FPSW], Uses = [FPSW], hasSideEffects = 0 in
def FNSTSW16r : I<0xDF, MRM_E0, // AX = fp flags
(outs), (ins), "fnstsw\t{%ax|ax}",
[(set AX, (X86fp_stsw FPSW))]>;
(outs), (ins), "fnstsw\t{%ax|ax}", []>;
let Defs = [FPSW], Uses = [FPCW] in
def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
(outs), (ins i16mem:$dst), "fnstcw\t$dst",
Expand Down
7 changes: 2 additions & 5 deletions llvm/lib/Target/X86/X86InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,6 @@ def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;

def X86sahf : SDNode<"X86ISD::SAHF", SDTX86sahf>;

def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand,
[SDNPHasChain, SDNPSideEffect]>;

Expand Down Expand Up @@ -1787,9 +1785,8 @@ def MOV8rm_NOREX : I<0x8A, MRMSrcMem,

// Condition code ops, incl. set if equal/not equal/...
let SchedRW = [WriteLAHFSAHF] in {
let Defs = [EFLAGS], Uses = [AH] in
def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf",
[(set EFLAGS, (X86sahf AH))]>,
let Defs = [EFLAGS], Uses = [AH], hasSideEffects = 0 in
def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>, // flags = AH
Requires<[HasLAHFSAHF]>;
let Defs = [AH], Uses = [EFLAGS], hasSideEffects = 0 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>, // AH = flags
Expand Down

0 comments on commit 4175d7e

Please sign in to comment.