diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 71d494d4797fc..660b318383196 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8509,33 +8509,21 @@ VPRecipeBase *VPRecipeBuilder::tryToWiden(Instruction *I, case Instruction::Add: case Instruction::And: case Instruction::AShr: - case Instruction::BitCast: case Instruction::FAdd: case Instruction::FCmp: case Instruction::FDiv: case Instruction::FMul: case Instruction::FNeg: - case Instruction::FPExt: - case Instruction::FPToSI: - case Instruction::FPToUI: - case Instruction::FPTrunc: case Instruction::FRem: case Instruction::FSub: case Instruction::ICmp: - case Instruction::IntToPtr: case Instruction::LShr: case Instruction::Mul: case Instruction::Or: - case Instruction::PtrToInt: case Instruction::Select: - case Instruction::SExt: case Instruction::Shl: - case Instruction::SIToFP: case Instruction::Sub: - case Instruction::Trunc: - case Instruction::UIToFP: case Instruction::Xor: - case Instruction::ZExt: case Instruction::Freeze: return new VPWidenRecipe(*I, make_range(Operands.begin(), Operands.end())); }; @@ -8688,6 +8676,11 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, *SI, make_range(Operands.begin(), Operands.end()))); } + if (auto *CI = dyn_cast(Instr)) { + return toVPRecipeResult( + new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(), CI)); + } + return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan)); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 23646901b396d..48a61fece983c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -247,11 +247,19 @@ void VPTransformState::addNewMetadata(Instruction *To, } void VPTransformState::addMetadata(Instruction *To, Instruction *From) { + // No source instruction to transfer metadata from? + if (!From) + return; + propagateMetadata(To, From); addNewMetadata(To, From); } void VPTransformState::addMetadata(ArrayRef To, Instruction *From) { + // No source instruction to transfer metadata from? + if (!From) + return; + for (Value *V : To) { if (Instruction *I = dyn_cast(V)) addMetadata(I, From); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 0360066daa238..eb6b9b1e2efb6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -959,6 +959,44 @@ class VPWidenRecipe : public VPRecipeBase, public VPValue { #endif }; +/// VPWidenCastRecipe is a recipe to create vector cast instructions. +class VPWidenCastRecipe : public VPRecipeBase, public VPValue { + /// Cast instruction opcode. + Instruction::CastOps Opcode; + + /// Result type for the cast. + Type *ResultTy; + +public: + VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, + CastInst *UI = nullptr) + : VPRecipeBase(VPDef::VPWidenCastSC, Op), VPValue(this, UI), + Opcode(Opcode), ResultTy(ResultTy) { + assert((!UI || UI->getOpcode() == Opcode) && + "opcode of underlying cast doesn't match"); + assert((!UI || UI->getType() == ResultTy) && + "result type of underlying cast doesn't match"); + } + + ~VPWidenCastRecipe() override = default; + + VP_CLASSOF_IMPL(VPDef::VPWidenCastSC) + + /// Produce widened copies of the cast. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif + + Instruction::CastOps getOpcode() const { return Opcode; } + + /// Returns the result type of the cast. + Type *getResultType() const { return ResultTy; } +}; + /// A recipe for widening Call instructions. class VPWidenCallRecipe : public VPRecipeBase, public VPValue { /// ID of the vector intrinsic to call when widening the call. If set the diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 4f3e527efcf90..77031a92a5dcf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -57,6 +57,7 @@ bool VPRecipeBase::mayWriteToMemory() const { case VPBlendSC: case VPReductionSC: case VPWidenCanonicalIVSC: + case VPWidenCastSC: case VPWidenGEPSC: case VPWidenIntOrFpInductionSC: case VPWidenPHISC: @@ -90,6 +91,7 @@ bool VPRecipeBase::mayReadFromMemory() const { case VPBlendSC: case VPReductionSC: case VPWidenCanonicalIVSC: + case VPWidenCastSC: case VPWidenGEPSC: case VPWidenIntOrFpInductionSC: case VPWidenPHISC: @@ -128,6 +130,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPReductionSC: case VPScalarIVStepsSC: case VPWidenCanonicalIVSC: + case VPWidenCastSC: case VPWidenGEPSC: case VPWidenIntOrFpInductionSC: case VPWidenPHISC: @@ -684,34 +687,6 @@ void VPWidenRecipe::execute(VPTransformState &State) { break; } - - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: { - auto *CI = cast(&I); - State.setDebugLocFromInst(CI); - - /// Vectorize casts. - assert(State.VF.isVector() && "not widening"); - Type *DestTy = VectorType::get(CI->getType(), State.VF); - - for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *A = State.get(getOperand(0), Part); - Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); - State.set(this, Cast, Part); - State.addMetadata(Cast, &I); - } - break; - } default: // This instruction is not vectorized by simple widening. LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); @@ -729,6 +704,34 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, O << Cmp->getPredicate() << " "; printOperands(O, SlotTracker); } +#endif + +void VPWidenCastRecipe::execute(VPTransformState &State) { + auto *I = cast_or_null(getUnderlyingValue()); + if (I) + State.setDebugLocFromInst(I); + auto &Builder = State.Builder; + /// Vectorize casts. + assert(State.VF.isVector() && "Not vectorizing?"); + Type *DestTy = VectorType::get(getResultType(), State.VF); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *A = State.get(getOperand(0), Part); + Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy); + State.set(this, Cast, Part); + State.addMetadata(Cast, I); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "WIDEN-CAST "; + printAsOperand(O, SlotTracker); + O << " = " << Instruction::getOpcodeName(Opcode) << " "; + printOperands(O, SlotTracker); + O << " to " << *getResultType(); +} void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 6f0dfccb682cb..c7a447b961464 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -79,6 +79,10 @@ void VPlanTransforms::VPInstructionsToVPRecipes( } else if (SelectInst *SI = dyn_cast(Inst)) { NewRecipe = new VPWidenSelectRecipe(*SI, Plan->mapToVPValues(SI->operands())); + } else if (auto *CI = dyn_cast(Inst)) { + NewRecipe = new VPWidenCastRecipe( + CI->getOpcode(), Plan->getVPValueOrAddLiveIn(CI->getOperand(0)), + CI->getType(), CI); } else { NewRecipe = new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands())); diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index c1a30133a0f8d..ac110bb3b0ef9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -345,6 +345,7 @@ class VPDef { VPScalarIVStepsSC, VPWidenCallSC, VPWidenCanonicalIVSC, + VPWidenCastSC, VPWidenGEPSC, VPWidenMemoryInstructionSC, VPWidenSC, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll index df2e52349eea9..503310c3f63f7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll @@ -19,7 +19,7 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%l> = load ir<%gep.src> -; CHECK-NEXT: WIDEN ir<%conv> = fpext ir<%l> +; CHECK-NEXT: WIDEN-CAST ir<%conv> = fpext ir<%l> to double ; CHECK-NEXT: WIDEN-CALL ir<%s> = call @llvm.sin.f64(ir<%conv>) (using library function: __simd_sin_v2f64) ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep.dst> @@ -46,7 +46,7 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%l> = load ir<%gep.src> -; CHECK-NEXT: WIDEN ir<%conv> = fpext ir<%l> +; CHECK-NEXT: WIDEN-CAST ir<%conv> = fpext ir<%l> to double ; CHECK-NEXT: WIDEN-CALL ir<%s> = call @llvm.sin.f64(ir<%conv>) (using vector intrinsic) ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep.dst> diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 47733956e1526..15396c0825f02 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -42,7 +42,7 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: -; CHECK-NEXT: WIDEN ir<%conv> = sext vp<[[PRED1]]> +; CHECK-NEXT: WIDEN-CAST ir<%conv> = sext vp<[[PRED1]]> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0> ir<%conv> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: @@ -112,7 +112,7 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> -; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> +; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur> ir<%recur.next> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: @@ -182,7 +182,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next> ; CHECK-NEXT: EMIT vp<[[WIDEN_CAN:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDEN_CAN]]> vp<[[BTC]]> -; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> +; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur> ir<%recur.next> ; CHECK-NEXT: Successor(s): pred.srem ; CHECK-EMPTY: @@ -275,7 +275,7 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: -; CHECK-NEXT: WIDEN ir<%conv> = sext vp<[[PRED]]> +; CHECK-NEXT: WIDEN-CAST ir<%conv> = sext vp<[[PRED]]> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0> ir<%conv> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: @@ -357,7 +357,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> -; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> +; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur> ir<%recur.next> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: