diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index dc84025c166a3..ed193cc224bcf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2073,8 +2073,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (Subtarget.hasVBMI2()) { for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) { - setOperationAction(ISD::FSHL, VT, Custom); - setOperationAction(ISD::FSHR, VT, Custom); + setOperationAction(ISD::FSHL, VT, Legal); + setOperationAction(ISD::FSHR, VT, Legal); } setOperationAction(ISD::ROTL, MVT::v32i16, Custom); @@ -2089,8 +2089,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) { for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32, MVT::v4i64}) { - setOperationAction(ISD::FSHL, VT, Custom); - setOperationAction(ISD::FSHR, VT, Custom); + setOperationAction(ISD::FSHL, VT, Subtarget.hasVLX() ? Legal : Custom); + setOperationAction(ISD::FSHR, VT, Subtarget.hasVLX() ? Legal : Custom); } } @@ -2703,6 +2703,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, ISD::STRICT_FP_EXTEND, ISD::FP_ROUND, ISD::STRICT_FP_ROUND, + ISD::FSHL, + ISD::FSHR, ISD::INTRINSIC_VOID, ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN}); @@ -31314,19 +31316,15 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, bool IsCstSplat = X86::isConstantSplat(Amt, APIntShiftAmt); unsigned NumElts = VT.getVectorNumElements(); - if (Subtarget.hasVBMI2() && EltSizeInBits > 8) { - - if (IsCstSplat) { - if (IsFSHR) - std::swap(Op0, Op1); - uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits); - SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8); - return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, - {Op0, Op1, Imm}, DAG, Subtarget); - } + // For non-VLX VBMI2 targets, widen 128/256-bit to 512-bit so + // the rest of the lowering/isel can select the VBMI2 forms. + // Only Custom types (v8i16, v4i32, v2i64, v16i16, v8i32, v4i64) can + // reach LowerFunnelShift with VBMI2 but no VLX, so no type check needed. + if (Subtarget.hasVBMI2() && !Subtarget.hasVLX() && EltSizeInBits > 8) { return getAVX512Node(IsFSHR ? ISD::FSHR : ISD::FSHL, DL, VT, {Op0, Op1, Amt}, DAG, Subtarget); } + assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 || VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16 || VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) && @@ -57624,6 +57622,40 @@ static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Combiner: turn uniform-constant splat funnel shifts into VSHLD/VSHRD +static SDValue combineFunnelShift(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { + SDLoc DL(N); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue Amt = N->getOperand(2); + EVT VT = Op0.getValueType(); + + if (!VT.isVector()) + return SDValue(); + + // Only combine if the operation is legal for this type. + // This ensures we don't try to convert types that need to be + // widened/promoted. + if (!DAG.getTargetLoweringInfo().isOperationLegal(N->getOpcode(), VT)) + return SDValue(); + + unsigned EltSize = VT.getScalarSizeInBits(); + APInt ShiftVal; + if (!X86::isConstantSplat(Amt, ShiftVal)) + return SDValue(); + + uint64_t ModAmt = ShiftVal.urem(EltSize); + SDValue Imm = DAG.getTargetConstant(ModAmt, DL, MVT::i8); + bool IsFSHR = N->getOpcode() == ISD::FSHR; + + if (IsFSHR) + std::swap(Op0, Op1); + unsigned Opcode = IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD; + return DAG.getNode(Opcode, DL, VT, {Op0, Op1, Imm}); +} + static bool needCarryOrOverflowFlag(SDValue Flags) { assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!"); @@ -61228,6 +61260,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI); case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget); + case ISD::FSHL: + case ISD::FSHR: return combineFunnelShift(N, DAG, DCI, Subtarget); // clang-format on }