Skip to content

Commit

Permalink
[X86] Remove SDIVREM8_SEXT_HREG/UDIVREM8_ZEXT_HREG and their associat…
Browse files Browse the repository at this point in the history
…ed DAG combine and target bits support. Use a post isel peephole instead.

Summary:
These nodes exist to overcome an isel problem where we can generate a zero extend of an AH register followed by an extract subreg, and another zero extend. The first zero extend exists to avoid a partial register update copying the AH register into the low 8-bits. The second zero extend exists if the user wanted the remainder zero extended.

To make this work we had a DAG combine to morph the DIVREM opcode to a special opcode that included the extend. But then we had to add the new node to computeKnownBits and computeNumSignBits to process the extension portion.

This patch instead removes all of that and adds a late peephole to detect the two extends.

Reviewers: RKSimon, spatel

Reviewed By: RKSimon

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D53449

llvm-svn: 344874
  • Loading branch information
topperc committed Oct 21, 2018
1 parent e367039 commit 5eea94e
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 73 deletions.
72 changes: 54 additions & 18 deletions llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,8 @@ namespace {
MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
const SDLoc &dl, MVT VT, SDNode *Node,
SDValue &InFlag);

bool tryOptimizeRem8Extend(SDNode *N);
};
}

Expand Down Expand Up @@ -841,22 +843,63 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
}
}

// Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
unsigned Opc = N->getMachineOpcode();
if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
Opc != X86::MOVSX64rr8)
return false;

SDValue N0 = N->getOperand(0);

// We need to be extracting the lower bit of an extend.
if (!N0.isMachineOpcode() ||
N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
N0.getConstantOperandVal(1) != X86::sub_8bit)
return false;

// We're looking for either a movsx or movzx to match the original opcode.
unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
: X86::MOVSX32rr8_NOREX;
SDValue N00 = N0.getOperand(0);
if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
return false;

if (Opc == X86::MOVSX64rr8) {
// If we had a sign extend from 8 to 64 bits. We still need to go from 32
// to 64.
MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
MVT::i64, N00);
ReplaceUses(N, Extend);
} else {
// Ok we can drop this extend and just use the original extend.
ReplaceUses(N, N00.getNode());
}

return true;
}

void X86DAGToDAGISel::PostprocessISelDAG() {
// Skip peepholes at -O0.
if (TM.getOptLevel() == CodeGenOpt::None)
return;

// Attempt to remove vectors moves that were inserted to zero upper bits.

SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();

bool MadeChange = false;
while (Position != CurDAG->allnodes_begin()) {
SDNode *N = &*--Position;
// Skip dead nodes and any non-machine opcodes.
if (N->use_empty() || !N->isMachineOpcode())
continue;

if (tryOptimizeRem8Extend(N)) {
MadeChange = true;
continue;
}

// Attempt to remove vectors moves that were inserted to zero upper bits.

if (N->getMachineOpcode() != TargetOpcode::SUBREG_TO_REG)
continue;

Expand Down Expand Up @@ -905,11 +948,11 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
// Producing instruction is another vector instruction. We can drop the
// move.
CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));

// If the move is now dead, delete it.
if (Move.getNode()->use_empty())
CurDAG->RemoveDeadNode(Move.getNode());
MadeChange = true;
}

if (MadeChange)
CurDAG->RemoveDeadNodes();
}


Expand Down Expand Up @@ -3370,15 +3413,12 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
}

case ISD::SDIVREM:
case ISD::UDIVREM:
case X86ISD::SDIVREM8_SEXT_HREG:
case X86ISD::UDIVREM8_ZEXT_HREG: {
case ISD::UDIVREM: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);

unsigned Opc, MOpc;
bool isSigned = (Opcode == ISD::SDIVREM ||
Opcode == X86ISD::SDIVREM8_SEXT_HREG);
bool isSigned = Opcode == ISD::SDIVREM;
if (!isSigned) {
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
Expand Down Expand Up @@ -3517,13 +3557,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Result(RNode, 0);
InFlag = SDValue(RNode, 1);

if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
assert(Node->getValueType(1) == MVT::i32 && "Unexpected result type!");
} else {
Result =
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
}
Result =
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);

ReplaceUses(SDValue(Node, 1), Result);
LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
dbgs() << '\n');
Expand Down
51 changes: 0 additions & 51 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26638,8 +26638,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::UMUL: return "X86ISD::UMUL";
case X86ISD::SMUL8: return "X86ISD::SMUL8";
case X86ISD::UMUL8: return "X86ISD::UMUL8";
case X86ISD::SDIVREM8_SEXT_HREG: return "X86ISD::SDIVREM8_SEXT_HREG";
case X86ISD::UDIVREM8_ZEXT_HREG: return "X86ISD::UDIVREM8_ZEXT_HREG";
case X86ISD::INC: return "X86ISD::INC";
case X86ISD::DEC: return "X86ISD::DEC";
case X86ISD::OR: return "X86ISD::OR";
Expand Down Expand Up @@ -29583,13 +29581,6 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.Zero &= Known2.Zero;
break;
}
case X86ISD::UDIVREM8_ZEXT_HREG:
// TODO: Support more than just the zero extended bits?
if (Op.getResNo() != 1)
break;
// The remainder is zero extended.
Known.Zero.setBitsFrom(8);
break;
}

// Handle target shuffles.
Expand Down Expand Up @@ -29720,12 +29711,6 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1);
return std::min(Tmp0, Tmp1);
}
case X86ISD::SDIVREM8_SEXT_HREG:
// TODO: Support more than just the sign extended bits?
if (Op.getResNo() != 1)
break;
// The remainder is sign extended.
return VTBits - 7;
}

// Fallback case.
Expand Down Expand Up @@ -38242,36 +38227,6 @@ static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags);
}

/// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) ->
/// (i8,i32 ({s/u}divrem_sext_hreg (i8 x, i8 y)
/// This exposes the {s/z}ext to the sdivrem lowering, so that it directly
/// extends from AH (which we otherwise need to do contortions to access).
static SDValue getDivRem8(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
auto OpcodeN = N->getOpcode();
auto OpcodeN0 = N0.getOpcode();
if (!((OpcodeN == ISD::SIGN_EXTEND && OpcodeN0 == ISD::SDIVREM) ||
(OpcodeN == ISD::ZERO_EXTEND && OpcodeN0 == ISD::UDIVREM)))
return SDValue();

EVT VT = N->getValueType(0);
EVT InVT = N0.getValueType();
if (N0.getResNo() != 1 || InVT != MVT::i8 ||
!(VT == MVT::i32 || VT == MVT::i64))
return SDValue();

SDVTList NodeTys = DAG.getVTList(MVT::i8, MVT::i32);
auto DivRemOpcode = OpcodeN0 == ISD::SDIVREM ? X86ISD::SDIVREM8_SEXT_HREG
: X86ISD::UDIVREM8_ZEXT_HREG;
SDValue R = DAG.getNode(DivRemOpcode, SDLoc(N), NodeTys, N0.getOperand(0),
N0.getOperand(1));
DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
// If this was a 64-bit extend, complete it.
if (VT == MVT::i64)
return DAG.getNode(OpcodeN, SDLoc(N), VT, R.getValue(1));
return R.getValue(1);
}

// If we face {ANY,SIGN,ZERO}_EXTEND that is applied to a CMOV with constant
// operands and the result of CMOV is not used anywhere else - promote CMOV
// itself instead of promoting its result. This could be beneficial, because:
Expand Down Expand Up @@ -38572,9 +38527,6 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
EVT InVT = N0.getValueType();
SDLoc DL(N);

if (SDValue DivRem8 = getDivRem8(N, DAG))
return DivRem8;

if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
return NewCMov;

Expand Down Expand Up @@ -38775,9 +38727,6 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
if (SDValue R = WidenMaskArithmetic(N, DAG, Subtarget))
return R;

if (SDValue DivRem8 = getDivRem8(N, DAG))
return DivRem8;

if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
return NewAdd;

Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -361,10 +361,6 @@ namespace llvm {
// 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS.
SMUL8, UMUL8,

// 8-bit divrem that zero-extend the high result (AH).
UDIVREM8_ZEXT_HREG,
SDIVREM8_SEXT_HREG,

// X86-specific multiply by immediate.
MUL_IMM,

Expand Down

0 comments on commit 5eea94e

Please sign in to comment.