Skip to content

Commit

Permalink
[VPlan] Add VPWidenCastRecipe, split off from VPWidenRecipe (NFCI).
Browse files Browse the repository at this point in the history
To generate cast instructions, the result type is needed. To allow
creating widened casts without underlying instruction, introduce a new
VPWidenCastRecipe that also holds the result type.

This functionality will be used in a follow-up patch to
implement truncateToMinimalBitwidths as VPlan-to-VPlan transform.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D149081
  • Loading branch information
fhahn committed May 5, 2023
1 parent 95bb95e commit e3afe0b
Show file tree
Hide file tree
Showing 8 changed files with 94 additions and 47 deletions.
17 changes: 5 additions & 12 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8509,33 +8509,21 @@ VPRecipeBase *VPRecipeBuilder::tryToWiden(Instruction *I,
case Instruction::Add:
case Instruction::And:
case Instruction::AShr:
case Instruction::BitCast:
case Instruction::FAdd:
case Instruction::FCmp:
case Instruction::FDiv:
case Instruction::FMul:
case Instruction::FNeg:
case Instruction::FPExt:
case Instruction::FPToSI:
case Instruction::FPToUI:
case Instruction::FPTrunc:
case Instruction::FRem:
case Instruction::FSub:
case Instruction::ICmp:
case Instruction::IntToPtr:
case Instruction::LShr:
case Instruction::Mul:
case Instruction::Or:
case Instruction::PtrToInt:
case Instruction::Select:
case Instruction::SExt:
case Instruction::Shl:
case Instruction::SIToFP:
case Instruction::Sub:
case Instruction::Trunc:
case Instruction::UIToFP:
case Instruction::Xor:
case Instruction::ZExt:
case Instruction::Freeze:
return new VPWidenRecipe(*I, make_range(Operands.begin(), Operands.end()));
};
Expand Down Expand Up @@ -8688,6 +8676,11 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
*SI, make_range(Operands.begin(), Operands.end())));
}

if (auto *CI = dyn_cast<CastInst>(Instr)) {
return toVPRecipeResult(
new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(), CI));
}

return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan));
}

Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,11 +247,19 @@ void VPTransformState::addNewMetadata(Instruction *To,
}

void VPTransformState::addMetadata(Instruction *To, Instruction *From) {
// No source instruction to transfer metadata from?
if (!From)
return;

propagateMetadata(To, From);
addNewMetadata(To, From);
}

void VPTransformState::addMetadata(ArrayRef<Value *> To, Instruction *From) {
// No source instruction to transfer metadata from?
if (!From)
return;

for (Value *V : To) {
if (Instruction *I = dyn_cast<Instruction>(V))
addMetadata(I, From);
Expand Down
38 changes: 38 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,44 @@ class VPWidenRecipe : public VPRecipeBase, public VPValue {
#endif
};

/// VPWidenCastRecipe is a recipe to create vector cast instructions.
class VPWidenCastRecipe : public VPRecipeBase, public VPValue {
/// Cast instruction opcode.
Instruction::CastOps Opcode;

/// Result type for the cast.
Type *ResultTy;

public:
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
CastInst *UI = nullptr)
: VPRecipeBase(VPDef::VPWidenCastSC, Op), VPValue(this, UI),
Opcode(Opcode), ResultTy(ResultTy) {
assert((!UI || UI->getOpcode() == Opcode) &&
"opcode of underlying cast doesn't match");
assert((!UI || UI->getType() == ResultTy) &&
"result type of underlying cast doesn't match");
}

~VPWidenCastRecipe() override = default;

VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)

/// Produce widened copies of the cast.
void execute(VPTransformState &State) override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif

Instruction::CastOps getOpcode() const { return Opcode; }

/// Returns the result type of the cast.
Type *getResultType() const { return ResultTy; }
};

/// A recipe for widening Call instructions.
class VPWidenCallRecipe : public VPRecipeBase, public VPValue {
/// ID of the vector intrinsic to call when widening the call. If set the
Expand Down
59 changes: 31 additions & 28 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPBlendSC:
case VPReductionSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
Expand Down Expand Up @@ -90,6 +91,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPBlendSC:
case VPReductionSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
Expand Down Expand Up @@ -128,6 +130,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPReductionSC:
case VPScalarIVStepsSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
Expand Down Expand Up @@ -684,34 +687,6 @@ void VPWidenRecipe::execute(VPTransformState &State) {

break;
}

case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::FPExt:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::SIToFP:
case Instruction::UIToFP:
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
auto *CI = cast<CastInst>(&I);
State.setDebugLocFromInst(CI);

/// Vectorize casts.
assert(State.VF.isVector() && "not widening");
Type *DestTy = VectorType::get(CI->getType(), State.VF);

for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *A = State.get(getOperand(0), Part);
Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
State.set(this, Cast, Part);
State.addMetadata(Cast, &I);
}
break;
}
default:
// This instruction is not vectorized by simple widening.
LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
Expand All @@ -729,6 +704,34 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
O << Cmp->getPredicate() << " ";
printOperands(O, SlotTracker);
}
#endif

void VPWidenCastRecipe::execute(VPTransformState &State) {
auto *I = cast_or_null<Instruction>(getUnderlyingValue());
if (I)
State.setDebugLocFromInst(I);
auto &Builder = State.Builder;
/// Vectorize casts.
assert(State.VF.isVector() && "Not vectorizing?");
Type *DestTy = VectorType::get(getResultType(), State.VF);

for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *A = State.get(getOperand(0), Part);
Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
State.set(this, Cast, Part);
State.addMetadata(Cast, I);
}
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-CAST ";
printAsOperand(O, SlotTracker);
O << " = " << Instruction::getOpcodeName(Opcode) << " ";
printOperands(O, SlotTracker);
O << " to " << *getResultType();
}

void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
} else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
NewRecipe =
new VPWidenSelectRecipe(*SI, Plan->mapToVPValues(SI->operands()));
} else if (auto *CI = dyn_cast<CastInst>(Inst)) {
NewRecipe = new VPWidenCastRecipe(
CI->getOpcode(), Plan->getVPValueOrAddLiveIn(CI->getOperand(0)),
CI->getType(), CI);
} else {
NewRecipe =
new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands()));
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlanValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ class VPDef {
VPScalarIVStepsSC,
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenCastSC,
VPWidenGEPSC,
VPWidenMemoryInstructionSC,
VPWidenSC,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ target triple = "arm64-apple-ios"
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%l> = load ir<%gep.src>
; CHECK-NEXT: WIDEN ir<%conv> = fpext ir<%l>
; CHECK-NEXT: WIDEN-CAST ir<%conv> = fpext ir<%l> to double
; CHECK-NEXT: WIDEN-CALL ir<%s> = call @llvm.sin.f64(ir<%conv>) (using library function: __simd_sin_v2f64)
; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep.dst>
Expand All @@ -46,7 +46,7 @@ target triple = "arm64-apple-ios"
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%l> = load ir<%gep.src>
; CHECK-NEXT: WIDEN ir<%conv> = fpext ir<%l>
; CHECK-NEXT: WIDEN-CAST ir<%conv> = fpext ir<%l> to double
; CHECK-NEXT: WIDEN-CALL ir<%s> = call @llvm.sin.f64(ir<%conv>) (using vector intrinsic)
; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep.dst>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize
; CHECK-NEXT: Successor(s): loop.0
; CHECK-EMPTY:
; CHECK-NEXT: loop.0:
; CHECK-NEXT: WIDEN ir<%conv> = sext vp<[[PRED1]]>
; CHECK-NEXT: WIDEN-CAST ir<%conv> = sext vp<[[PRED1]]> to i32
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0> ir<%conv>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
Expand Down Expand Up @@ -112,7 +112,7 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize {
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur> ir<%recur.next>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
Expand Down Expand Up @@ -182,7 +182,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next>
; CHECK-NEXT: EMIT vp<[[WIDEN_CAN:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]>
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDEN_CAN]]> vp<[[BTC]]>
; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur> ir<%recur.next>
; CHECK-NEXT: Successor(s): pred.srem
; CHECK-EMPTY:
Expand Down Expand Up @@ -275,7 +275,7 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr
; CHECK-NEXT: Successor(s): loop.0
; CHECK-EMPTY:
; CHECK-NEXT: loop.0:
; CHECK-NEXT: WIDEN ir<%conv> = sext vp<[[PRED]]>
; CHECK-NEXT: WIDEN-CAST ir<%conv> = sext vp<[[PRED]]> to i32
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0> ir<%conv>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
Expand Down Expand Up @@ -357,7 +357,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur> ir<%recur.next>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
Expand Down

0 comments on commit e3afe0b

Please sign in to comment.