Skip to content

Commit

Permalink
[LSV] Fix the ContextInst for computeKnownBits.
Browse files Browse the repository at this point in the history
Previously we used the later of GEPA or GEPB.  This is hacky because
really we should be using the later of the two load/store instructions
being considered.  But also it's flat-out incorrect, because GEPA and
GEPB might be in different BBs, in which case we cannot ask which one
comes last (assertion failure,
https://reviews.llvm.org/D149893#4378332).

Fixed, now we use the correct context instruction.

Differential Revision: https://reviews.llvm.org/D151630
  • Loading branch information
jlebar committed May 28, 2023
1 parent 5b4fed6 commit f225471
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 21 deletions.
51 changes: 30 additions & 21 deletions llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
Expand Up @@ -296,10 +296,13 @@ class Vectorizer {

/// Tries to compute the offset in bytes PtrB - PtrA.
std::optional<APInt> getConstantOffset(Value *PtrA, Value *PtrB,
Instruction *ContextInst,
unsigned Depth = 0);
std::optional<APInt> gtConstantOffsetComplexAddrs(Value *PtrA, Value *PtrB,
unsigned Depth);
std::optional<APInt> getConstantOffsetComplexAddrs(Value *PtrA, Value *PtrB,
Instruction *ContextInst,
unsigned Depth);
std::optional<APInt> getConstantOffsetSelects(Value *PtrA, Value *PtrB,
Instruction *ContextInst,
unsigned Depth);

/// Gets the element type of the vector that the chain will load or store.
Expand Down Expand Up @@ -1160,15 +1163,15 @@ static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA,
return false;
}

std::optional<APInt> Vectorizer::gtConstantOffsetComplexAddrs(Value *PtrA,
Value *PtrB,
unsigned Depth) {
LLVM_DEBUG(dbgs() << "LSV: gtConstantOffsetComplexAddrs PtrA=" << *PtrA
<< " PtrB=" << *PtrB << " Depth=" << Depth << "\n");
std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs(
Value *PtrA, Value *PtrB, Instruction *ContextInst, unsigned Depth) {
LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA
<< " PtrB=" << *PtrB << " ContextInst=" << *ContextInst
<< " Depth=" << Depth << "\n");
auto *GEPA = dyn_cast<GetElementPtrInst>(PtrA);
auto *GEPB = dyn_cast<GetElementPtrInst>(PtrB);
if (!GEPA || !GEPB)
return getConstantOffsetSelects(PtrA, PtrB, Depth);
return getConstantOffsetSelects(PtrA, PtrB, ContextInst, Depth);

// Look through GEPs after checking they're the same except for the last
// index.
Expand Down Expand Up @@ -1215,7 +1218,7 @@ std::optional<APInt> Vectorizer::gtConstantOffsetComplexAddrs(Value *PtrA,
return std::nullopt;
APInt IdxDiff = *IdxDiffRange.getSingleElement();

LLVM_DEBUG(dbgs() << "LSV: gtConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
<< "\n");

// Now we need to prove that adding IdxDiff to ValA won't overflow.
Expand Down Expand Up @@ -1257,7 +1260,6 @@ std::optional<APInt> Vectorizer::gtConstantOffsetComplexAddrs(Value *PtrA,
if (!Safe) {
// When computing known bits, use the GEPs as context instructions, since
// they likely are in the same BB as the load/store.
Instruction *ContextInst = GEPA->comesBefore(GEPB) ? GEPB : GEPA;
KnownBits Known(BitWidth);
computeKnownBits((IdxDiff.sge(0) ? ValA : OpB), Known, DL, 0, &AC,
ContextInst, &DT);
Expand All @@ -1274,8 +1276,8 @@ std::optional<APInt> Vectorizer::gtConstantOffsetComplexAddrs(Value *PtrA,
return std::nullopt;
}

std::optional<APInt>
Vectorizer::getConstantOffsetSelects(Value *PtrA, Value *PtrB, unsigned Depth) {
std::optional<APInt> Vectorizer::getConstantOffsetSelects(
Value *PtrA, Value *PtrB, Instruction *ContextInst, unsigned Depth) {
if (Depth++ == MaxDepth)
return std::nullopt;

Expand All @@ -1284,13 +1286,15 @@ Vectorizer::getConstantOffsetSelects(Value *PtrA, Value *PtrB, unsigned Depth) {
if (SelectA->getCondition() != SelectB->getCondition())
return std::nullopt;
LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetSelects, PtrA=" << *PtrA
<< ", PtrB=" << *PtrB << ", Depth=" << Depth << "\n");
<< ", PtrB=" << *PtrB << ", ContextInst="
<< *ContextInst << ", Depth=" << Depth << "\n");
std::optional<APInt> TrueDiff = getConstantOffset(
SelectA->getTrueValue(), SelectB->getTrueValue(), Depth);
SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst, Depth);
if (!TrueDiff.has_value())
return std::nullopt;
std::optional<APInt> FalseDiff = getConstantOffset(
SelectA->getFalseValue(), SelectB->getFalseValue(), Depth);
std::optional<APInt> FalseDiff =
getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),
ContextInst, Depth);
if (TrueDiff == FalseDiff)
return TrueDiff;
}
Expand Down Expand Up @@ -1429,9 +1433,11 @@ std::vector<Chain> Vectorizer::gatherChains(ArrayRef<Instruction *> Instrs) {
auto ChainIter = MRU.begin();
for (size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.end();
++J, ++ChainIter) {
std::optional<APInt> Offset =
getConstantOffset(getLoadStorePointerOperand(ChainIter->first),
getLoadStorePointerOperand(I));
std::optional<APInt> Offset = getConstantOffset(
getLoadStorePointerOperand(ChainIter->first),
getLoadStorePointerOperand(I),
/*ContextInst=*/
(ChainIter->first->comesBefore(I) ? I : ChainIter->first));
if (Offset.has_value()) {
// `Offset` might not have the expected number of bits, if e.g. AS has a
// different number of bits than opaque pointers.
Expand Down Expand Up @@ -1464,9 +1470,11 @@ std::vector<Chain> Vectorizer::gatherChains(ArrayRef<Instruction *> Instrs) {
}

std::optional<APInt> Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,
Instruction *ContextInst,
unsigned Depth) {
LLVM_DEBUG(dbgs() << "LSV: getConstantOffset, PtrA=" << *PtrA
<< ", PtrB=" << *PtrB << ", Depth=" << Depth << "\n");
<< ", PtrB=" << *PtrB << ", ContextInst= " << *ContextInst
<< ", Depth=" << Depth << "\n");
unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(PtrA->getType());
APInt OffsetA(OffsetBitWidth, 0);
APInt OffsetB(OffsetBitWidth, 0);
Expand Down Expand Up @@ -1495,7 +1503,8 @@ std::optional<APInt> Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,
if (DistRange.isSingleElement())
return OffsetB - OffsetA + *DistRange.getSingleElement();
}
std::optional<APInt> Diff = gtConstantOffsetComplexAddrs(PtrA, PtrB, Depth);
std::optional<APInt> Diff =
getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst, Depth);
if (Diff.has_value())
return OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth());
return std::nullopt;
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/gep-bitcast.ll
Expand Up @@ -118,3 +118,22 @@ define void @sexted_i1_gep_index(ptr addrspace(1) %p, i32 %val) {
%val1 = load i32, ptr addrspace(1) %gep.1
ret void
}

; CHECK-LABEL: @zexted_i1_gep_index_different_bbs
; CHECK: load i32
; CHECK: load i32
define void @zexted_i1_gep_index_different_bbs(ptr addrspace(1) %p, i32 %val) {
entry:
%selector = icmp eq i32 %val, 0
%flipped = xor i1 %selector, 1
%index.0 = zext i1 %selector to i64
%index.1 = zext i1 %flipped to i64
%gep.0 = getelementptr inbounds i32, ptr addrspace(1) %p, i64 %index.0
br label %next

next:
%gep.1 = getelementptr inbounds i32, ptr addrspace(1) %p, i64 %index.1
%val0 = load i32, ptr addrspace(1) %gep.0
%val1 = load i32, ptr addrspace(1) %gep.1
ret void
}

0 comments on commit f225471

Please sign in to comment.