Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 48 additions & 14 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2073,8 +2073,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,

if (Subtarget.hasVBMI2()) {
for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
setOperationAction(ISD::FSHL, VT, Custom);
setOperationAction(ISD::FSHR, VT, Custom);
setOperationAction(ISD::FSHL, VT, Legal);
setOperationAction(ISD::FSHR, VT, Legal);
}

setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
Expand All @@ -2089,8 +2089,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
MVT::v4i64}) {
setOperationAction(ISD::FSHL, VT, Custom);
setOperationAction(ISD::FSHR, VT, Custom);
setOperationAction(ISD::FSHL, VT, Subtarget.hasVLX() ? Legal : Custom);
setOperationAction(ISD::FSHR, VT, Subtarget.hasVLX() ? Legal : Custom);
}
}

Expand Down Expand Up @@ -2703,6 +2703,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::STRICT_FP_EXTEND,
ISD::FP_ROUND,
ISD::STRICT_FP_ROUND,
ISD::FSHL,
ISD::FSHR,
ISD::INTRINSIC_VOID,
ISD::INTRINSIC_WO_CHAIN,
ISD::INTRINSIC_W_CHAIN});
Expand Down Expand Up @@ -31314,19 +31316,15 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
bool IsCstSplat = X86::isConstantSplat(Amt, APIntShiftAmt);
unsigned NumElts = VT.getVectorNumElements();

if (Subtarget.hasVBMI2() && EltSizeInBits > 8) {

if (IsCstSplat) {
if (IsFSHR)
std::swap(Op0, Op1);
uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits);
SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8);
return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
{Op0, Op1, Imm}, DAG, Subtarget);
}
// For non-VLX VBMI2 targets, widen 128/256-bit to 512-bit so
// the rest of the lowering/isel can select the VBMI2 forms.
// Only Custom types (v8i16, v4i32, v2i64, v16i16, v8i32, v4i64) can
// reach LowerFunnelShift with VBMI2 but no VLX, so no type check needed.
if (Subtarget.hasVBMI2() && !Subtarget.hasVLX() && EltSizeInBits > 8) {
return getAVX512Node(IsFSHR ? ISD::FSHR : ISD::FSHL, DL, VT,
{Op0, Op1, Amt}, DAG, Subtarget);
}

assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16 ||
VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) &&
Expand Down Expand Up @@ -57624,6 +57622,40 @@ static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
return SDValue();
}

// Combiner: turn uniform-constant splat funnel shifts into VSHLD/VSHRD
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need the combine? Can we use patten match instead?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe it is because in the combine we can easily check for constant splats and also type legality. I'm not sure how we'd do this with TableGen patterns. @RKSimon do you agree?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can't currently easily match constant splats inside td patterns - it'd be a nice to have someday, but beyond the scope of this patch - plus we'd hit the problem that the X86ISD::VSHRD node has an implicit operand swap that is going to be messy to remove.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@phoebewang There is the option of moving this inside X86ISelDAGToDAG though.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, thanks!

static SDValue combineFunnelShift(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDValue Amt = N->getOperand(2);
EVT VT = Op0.getValueType();

if (!VT.isVector())
return SDValue();

// Only combine if the operation is legal for this type.
// This ensures we don't try to convert types that need to be
// widened/promoted.
if (!DAG.getTargetLoweringInfo().isOperationLegal(N->getOpcode(), VT))
return SDValue();

unsigned EltSize = VT.getScalarSizeInBits();
APInt ShiftVal;
if (!X86::isConstantSplat(Amt, ShiftVal))
return SDValue();

uint64_t ModAmt = ShiftVal.urem(EltSize);
SDValue Imm = DAG.getTargetConstant(ModAmt, DL, MVT::i8);
bool IsFSHR = N->getOpcode() == ISD::FSHR;

if (IsFSHR)
std::swap(Op0, Op1);
unsigned Opcode = IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD;
return DAG.getNode(Opcode, DL, VT, {Op0, Op1, Imm});
}

static bool needCarryOrOverflowFlag(SDValue Flags) {
assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");

Expand Down Expand Up @@ -61228,6 +61260,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI);
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget);
case ISD::FSHL:
case ISD::FSHR: return combineFunnelShift(N, DAG, DCI, Subtarget);
// clang-format on
}

Expand Down