Skip to content

Commit

Permalink
Recommit "[VPlan] Replace disjoint or with add instead of dropping di…
Browse files Browse the repository at this point in the history
…sjoint. (#83821)"

Recommit with a fix for the use-after-free causing the revert.
This reverts the revert commit f872043.

Original commit message:

Dropping disjoint from an OR may yield incorrect results, as some
analysis may have converted it to an Add implicitly (e.g. SCEV used for
dependence analysis). Instead, replace it with an equivalent Add.

This is possible as all users of the disjoint OR only access lanes where
the operands are disjoint or poison otherwise.

Note that replacing all disjoint ORs with ADDs instead of dropping the
flags is not strictly necessary. It is only needed for disjoint ORs that
SCEV treated as ADDs, but those are not tracked.

There are other places that may drop poison-generating flags; those
likely need similar treatment.

Fixes #81872

PR: #83821
  • Loading branch information
fhahn committed Mar 27, 2024
1 parent ed68aac commit 6ef8299
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 2 deletions.
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ class VPBuilder {
public:
VPBuilder() = default;
VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
VPBuilder(VPRecipeBase *InsertPt) {
setInsertPoint(InsertPt->getParent(), InsertPt->getIterator());
}

/// Clear the insertion point: created instructions will not be inserted into
/// a block.
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1127,6 +1127,12 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
return WrapFlags.HasNSW;
}

bool isDisjoint() const {
assert(OpType == OperationType::DisjointOp &&
"recipe cannot have a disjoing flag");
return DisjointFlags.IsDisjoint;
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void printFlags(raw_ostream &O) const;
#endif
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,11 @@ m_Mul(const Op0_t &Op0, const Op1_t &Op1) {
return m_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1);
}

template <typename Op0_t, typename Op1_t>
inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or>
m_Or(const Op0_t &Op0, const Op1_t &Op1) {
return m_Binary<Instruction::Or, Op0_t, Op1_t>(Op0, Op1);
}
} // namespace VPlanPatternMatch
} // namespace llvm

Expand Down
19 changes: 18 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1255,7 +1255,24 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
// load/store. If the underlying instruction has poison-generating flags,
// drop them directly.
if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(CurRec)) {
RecWithFlags->dropPoisonGeneratingFlags();
VPValue *A, *B;
using namespace llvm::VPlanPatternMatch;
// Dropping disjoint from an OR may yield incorrect results, as some
// analysis may have converted it to an Add implicitly (e.g. SCEV used
// for dependence analysis). Instead, replace it with an equivalent Add.
// This is possible as all users of the disjoint OR only access lanes
// where the operands are disjoint or poison otherwise.
if (match(RecWithFlags, m_Or(m_VPValue(A), m_VPValue(B))) &&
RecWithFlags->isDisjoint()) {
VPBuilder Builder(RecWithFlags);
VPInstruction *New = Builder.createOverflowingOp(
Instruction::Add, {A, B}, {false, false},
RecWithFlags->getDebugLoc());
RecWithFlags->replaceAllUsesWith(New);
RecWithFlags->eraseFromParent();
CurRec = New;
} else
RecWithFlags->dropPoisonGeneratingFlags();
} else {
Instruction *Instr = dyn_cast_or_null<Instruction>(
CurRec->getVPSingleValue()->getUnderlyingValue());
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[ARR]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3
Expand Down

0 comments on commit 6ef8299

Please sign in to comment.