Skip to content

Commit

Permalink
Reapply "[LV] Improve AnyOf reduction codegen. (#78304)"
Browse files Browse the repository at this point in the history
This reverts the revert commit 589c7ab.

This patch includes a fix for any-of reductions and epilogue
vectorization. Extra test coverage for the issue that caused the revert
has been added in 399ff08.

--------------------------------
Original commit message:

Update AnyOf reduction code generation to only keep track of the AnyOf
property in a boolean vector in the loop, only selecting either the new
or start value in the middle block.

The patch incorporates feedback from https://reviews.llvm.org/D153697.

This fixes the #62565, as now there aren't multiple uses of the
start/new values.

Fixes #62565

PR: #78304
  • Loading branch information
fhahn committed Apr 5, 2024
1 parent 2650375 commit c6e38b9
Show file tree
Hide file tree
Showing 11 changed files with 425 additions and 310 deletions.
9 changes: 0 additions & 9 deletions llvm/include/llvm/Transforms/Utils/LoopUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -372,15 +372,6 @@ RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID);
/// Returns the comparison predicate used when expanding a min/max reduction.
CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK);

/// See RecurrenceDescriptor::isAnyOfPattern for a description of the pattern we
/// are trying to match. In this pattern, we are only ever selecting between two
/// values: 1) an initial start value \p StartVal of the reduction PHI, and 2) a
/// loop invariant value. If any of lane value in \p Left, \p Right is not equal
/// to \p StartVal, select the loop invariant value. This is done by selecting
/// \p Right iff \p Left is equal to \p StartVal.
Value *createAnyOfOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK,
Value *Left, Value *Right);

/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
/// The Builder's fast-math-flags must be set to propagate the expected values.
Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
Expand Down
24 changes: 6 additions & 18 deletions llvm/lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1034,15 +1034,6 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
}
}

Value *llvm::createAnyOfOp(IRBuilderBase &Builder, Value *StartVal,
RecurKind RK, Value *Left, Value *Right) {
if (auto VTy = dyn_cast<VectorType>(Left->getType()))
StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
Value *Cmp =
Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
}

Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
Value *Right) {
Type *Ty = Left->getType();
Expand Down Expand Up @@ -1151,16 +1142,13 @@ Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src,
NewVal = SI->getTrueValue();
}

// Create a splat vector with the new value and compare this to the vector
// we want to reduce.
ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
Value *Right = Builder.CreateVectorSplat(EC, InitVal);
Value *Cmp =
Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");

// If any predicate is true it means that we want to select the new value.
Cmp = Builder.CreateOrReduce(Cmp);
return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
Value *AnyOf =
Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src;
// The compares in the loop may yield poison, which propagates through the
// bitwise ORs. Freeze it here before the condition is used.
AnyOf = Builder.CreateFreeze(AnyOf);
return Builder.CreateSelect(AnyOf, NewVal, InitVal, "rdx.select");
}

Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src,
Expand Down
4 changes: 1 addition & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,7 @@ class VPBuilder {
public:
VPBuilder() = default;
VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
VPBuilder(VPRecipeBase *InsertPt) {
setInsertPoint(InsertPt->getParent(), InsertPt->getIterator());
}
VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }

/// Clear the insertion point: created instructions will not be inserted into
/// a block.
Expand Down
76 changes: 62 additions & 14 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3055,9 +3055,8 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
}

// Create phi nodes to merge from the backedge-taken check block.
PHINode *BCResumeVal =
PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
LoopScalarPreHeader->getTerminator()->getIterator());
PHINode *BCResumeVal = PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
LoopScalarPreHeader->getFirstNonPHI());
// Copy original phi DL over to the new one.
BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc());

Expand Down Expand Up @@ -7460,7 +7459,6 @@ static void createAndCollectMergePhiForReduction(
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();

TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue();
Value *FinalValue =
State.get(RedResult, VPIteration(State.UF - 1, VPLane::getFirstLane()));
auto *ResumePhi =
Expand All @@ -7485,7 +7483,7 @@ static void createAndCollectMergePhiForReduction(
BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming),
Incoming);
else
BCBlockPhi->addIncoming(ReductionStartValue, Incoming);
BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming);
}

auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
Expand Down Expand Up @@ -7778,11 +7776,10 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(

// Now, compare the remaining count and if there aren't enough iterations to
// execute the vectorized epilogue skip to the scalar part.
BasicBlock *VecEpilogueIterationCountCheck = LoopVectorPreHeader;
VecEpilogueIterationCountCheck->setName("vec.epilog.iter.check");
LoopVectorPreHeader =
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
LI, nullptr, "vec.epilog.ph");
LoopVectorPreHeader->setName("vec.epilog.ph");
BasicBlock *VecEpilogueIterationCountCheck =
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->begin(), DT, LI,
nullptr, "vec.epilog.iter.check", true);
emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader,
VecEpilogueIterationCountCheck);

Expand Down Expand Up @@ -8901,6 +8898,10 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
// A ComputeReductionResult recipe is added to the middle block, also for
// in-loop reductions which compute their result in-loop, because generating
// the subsequent bc.merge.rdx phi is driven by ComputeReductionResult recipes.
//
// Adjust AnyOf reductions; replace the reduction phi for the selected value
// with a boolean reduction phi node to check if the condition is true in any
// iteration. The final value is selected by the final ComputeReductionResult.
void LoopVectorizationPlanner::adjustRecipesForReductions(
VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder,
ElementCount MinVF) {
Expand Down Expand Up @@ -9074,6 +9075,41 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
continue;

const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
// Adjust AnyOf reductions; replace the reduction phi for the selected value
// with a boolean reduction phi node to check if the condition is true in
// any iteration. The final value is selected by the final
// ComputeReductionResult.
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
auto *Select = cast<VPRecipeBase>(*find_if(PhiR->users(), [](VPUser *U) {
return isa<VPWidenSelectRecipe>(U) ||
(isa<VPReplicateRecipe>(U) &&
cast<VPReplicateRecipe>(U)->getUnderlyingInstr()->getOpcode() ==
Instruction::Select);
}));
VPValue *Cmp = Select->getOperand(0);
// If the compare is checking the reduction PHI node, adjust it to check
// the start value.
if (VPRecipeBase *CmpR = Cmp->getDefiningRecipe()) {
for (unsigned I = 0; I != CmpR->getNumOperands(); ++I)
if (CmpR->getOperand(I) == PhiR)
CmpR->setOperand(I, PhiR->getStartValue());
}
VPBuilder::InsertPointGuard Guard(Builder);
Builder.setInsertPoint(Select);

// If the true value of the select is the reduction phi, the new value is
// selected if the negated condition is true in any iteration.
if (Select->getOperand(1) == PhiR)
Cmp = Builder.createNot(Cmp);
VPValue *Or = Builder.createOr(PhiR, Cmp);
Select->getVPSingleValue()->replaceAllUsesWith(Or);

// Convert the reduction phi to operate on bools.
PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse(
OrigLoop->getHeader()->getContext())));
}

// If tail is folded by masking, introduce selects between the phi
// and the live-out instruction of each reduction, at the beginning of the
// dedicated latch block.
Expand Down Expand Up @@ -9106,7 +9142,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// then extend the loop exit value to enable InstCombine to evaluate the
// entire expression in the smaller type.
Type *PhiTy = PhiR->getStartValue()->getLiveInIRValue()->getType();
if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType() &&
!RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
Type *RdxTy = RdxDesc.getRecurrenceType();
auto *Trunc =
Expand Down Expand Up @@ -10198,9 +10236,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
Value *ResumeV = nullptr;
// TODO: Move setting of resume values to prepareToExecute.
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
ResumeV = ReductionResumeValues
.find(&ReductionPhi->getRecurrenceDescriptor())
->second;
const RecurrenceDescriptor &RdxDesc =
ReductionPhi->getRecurrenceDescriptor();
RecurKind RK = RdxDesc.getRecurrenceKind();
ResumeV = ReductionResumeValues.find(&RdxDesc)->second;
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
// VPReductionPHIRecipes for AnyOf reductions expect a boolean as
// start value; compare the final value from the main vector loop
// to the start value.
IRBuilder<> Builder(
cast<Instruction>(ResumeV)->getParent()->getFirstNonPHI());
ResumeV = Builder.CreateICmpNE(ResumeV,
RdxDesc.getRecurrenceStartValue());
}
} else {
// Create induction resume values for both widened pointer and
// integer/fp inductions and update the start value of the induction
Expand Down
13 changes: 6 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,8 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
// Reduce all of the unrolled parts into a single vector.
Value *ReducedPartRdx = RdxParts[0];
unsigned Op = RecurrenceDescriptor::getOpcode(RK);
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
Op = Instruction::Or;

if (PhiR->isOrdered()) {
ReducedPartRdx = RdxParts[State.UF - 1];
Expand All @@ -513,19 +515,16 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
ReducedPartRdx = Builder.CreateBinOp(
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
TrackingVH<Value> ReductionStartValue =
RdxDesc.getRecurrenceStartValue();
ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK,
ReducedPartRdx, RdxPart);
} else
else
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
}
}

// Create the reduction after the loop. Note that inloop reductions create
// the target reduction in the loop using a Reduction recipe.
if (State.VF.isVector() && !PhiR->isInLoop()) {
if ((State.VF.isVector() ||
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
!PhiR->isInLoop()) {
ReducedPartRdx =
createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
// If the reduction can be performed in a smaller type, we need to extend
Expand Down

0 comments on commit c6e38b9

Please sign in to comment.