Skip to content

Commit

Permalink
AMDGPU: Remove pointless conversions
Browse files Browse the repository at this point in the history
llvm-svn: 270139
  • Loading branch information
arsenm committed May 19, 2016
1 parent 847afa2 commit 4e3d383
Showing 1 changed file with 10 additions and 30 deletions.
40 changes: 10 additions & 30 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1176,28 +1176,23 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
// This is a shortcut for integer division because we have fast i32<->f32
// conversions, and fast f32 reciprocal instructions. The fractional part of a
// float is enough to accurately represent up to a 24-bit integer.
SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const {
SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
bool Sign) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
MVT IntVT = MVT::i32;
MVT FltVT = MVT::f32;

ISD::NodeType ToFp = sign ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
ISD::NodeType ToInt = sign ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
ISD::NodeType ToFp = Sign ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
ISD::NodeType ToInt = Sign ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;

if (VT.isVector()) {
unsigned NElts = VT.getVectorNumElements();
IntVT = MVT::getVectorVT(MVT::i32, NElts);
FltVT = MVT::getVectorVT(MVT::f32, NElts);
}

unsigned BitSize = VT.getScalarType().getSizeInBits();
unsigned BitSize = VT.getSizeInBits();

SDValue jq = DAG.getConstant(1, DL, IntVT);

if (sign) {
if (Sign) {
// char|short jq = ia ^ ib;
jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);

Expand All @@ -1207,27 +1202,20 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool

// jq = jq | 0x1
jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, DL, VT));

// jq = (int)jq
jq = DAG.getSExtOrTrunc(jq, DL, IntVT);
}

// int ia = (int)LHS;
SDValue ia = sign ?
DAG.getSExtOrTrunc(LHS, DL, IntVT) : DAG.getZExtOrTrunc(LHS, DL, IntVT);
SDValue ia = LHS;

// int ib, (int)RHS;
SDValue ib = sign ?
DAG.getSExtOrTrunc(RHS, DL, IntVT) : DAG.getZExtOrTrunc(RHS, DL, IntVT);
SDValue ib = RHS;

// float fa = (float)ia;
SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia);

// float fb = (float)ib;
SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);

// TODO: Should this propagate fast-math-flags?
// float fq = native_divide(fa, fb);
SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));

Expand All @@ -1238,8 +1226,7 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool
SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);

// float fr = mad(fqneg, fb, fa);
SDValue fr = DAG.getNode(ISD::FADD, DL, FltVT,
DAG.getNode(ISD::FMUL, DL, FltVT, fqneg, fb), fa);
SDValue fr = DAG.getNode(ISD::FMAD, DL, FltVT, fqneg, fb, fa);

// int iq = (int)fq;
SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);
Expand All @@ -1258,21 +1245,14 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool
// jq = (cv ? jq : 0);
jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, DL, VT));

// dst = trunc/extend to legal type
iq = sign ? DAG.getSExtOrTrunc(iq, DL, VT) : DAG.getZExtOrTrunc(iq, DL, VT);

// dst = iq + jq;
SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq);

// Rem needs compensation, it's easier to recompute it
SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS);
Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem);

SDValue Res[2] = {
Div,
Rem
};
return DAG.getMergeValues(Res, DL);
return DAG.getMergeValues({ Div, Rem }, DL);
}

void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
Expand Down

0 comments on commit 4e3d383

Please sign in to comment.