diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 90bae77bcf703..6fb28072afe46 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -59,6 +59,11 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(LazyValueInfoWrapperPass, "lazy-value-info", "Lazy Value Information Analysis", false, true) +static cl::opt PerPredRanges( + "lvi-per-pred-ranges", cl::Hidden, cl::init(false), + cl::desc("Enable tracking of ranges for a value in a block for" + "each block predecessor (default = false)")); + namespace llvm { FunctionPass *createLazyValueInfoPass() { return new LazyValueInfoWrapperPass(); @@ -103,6 +108,10 @@ namespace { namespace { using NonNullPointerSet = SmallDenseSet, 2>; +using BBLatticeElementMap = + SmallDenseMap, ValueLatticeElement, 4>; +using PredecessorValueLatticeMap = + SmallDenseMap, BBLatticeElementMap, 2>; /// This is the cache kept by LazyValueInfo which /// maintains information about queries across the clients' queries. @@ -117,6 +126,10 @@ class LazyValueInfoCache { // std::nullopt indicates that the nonnull pointers for this basic block // block have not been computed yet. std::optional NonNullPointers; + // This is an extension of the above LatticeElements, caching, for each + // Value, a ValueLatticeElement, for each predecessor of the BB tracked by + // this entry. + std::optional PredecessorLatticeElements; }; /// Cached information per basic block. @@ -134,8 +147,14 @@ class LazyValueInfoCache { BlockCacheEntry *getOrCreateBlockEntry(BasicBlock *BB) { auto It = BlockCache.find_as(BB); - if (It == BlockCache.end()) - It = BlockCache.insert({BB, std::make_unique()}).first; + if (It == BlockCache.end()) { + std::unique_ptr BCE = + std::make_unique(); + if (PerPredRanges) + BCE->PredecessorLatticeElements = + std::make_optional(); + It = BlockCache.insert({BB, std::move(BCE)}).first; + } return It->second.get(); } @@ -161,6 +180,28 @@ class LazyValueInfoCache { addValueHandle(Val); } + void insertPredecessorResults(Value *Val, BasicBlock *BB, + BBLatticeElementMap &PredLatticeElements) { + BlockCacheEntry *Entry = getOrCreateBlockEntry(BB); + + Entry->PredecessorLatticeElements->insert({Val, PredLatticeElements}); + + addValueHandle(Val); + } + + std::optional + getCachedPredecessorInfo(Value *V, BasicBlock *BB) const { + const BlockCacheEntry *Entry = getBlockEntry(BB); + if (!Entry) + return std::nullopt; + + auto LatticeIt = Entry->PredecessorLatticeElements->find_as(V); + if (LatticeIt == Entry->PredecessorLatticeElements->end()) + return std::nullopt; + + return LatticeIt->second; + } + std::optional getCachedValueInfo(Value *V, BasicBlock *BB) const { const BlockCacheEntry *Entry = getBlockEntry(BB); @@ -216,6 +257,8 @@ void LazyValueInfoCache::eraseValue(Value *V) { Pair.second->OverDefined.erase(V); if (Pair.second->NonNullPointers) Pair.second->NonNullPointers->erase(V); + if (PerPredRanges) + Pair.second->PredecessorLatticeElements->erase(V); } auto HandleIt = ValueHandles.find_as(V); @@ -230,6 +273,10 @@ void LVIValueHandle::deleted() { } void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { + // Clear all when a BB is removed. + if (PerPredRanges) + for (auto &Pair : BlockCache) + Pair.second->PredecessorLatticeElements->clear(); BlockCache.erase(BB); } @@ -691,6 +738,9 @@ LazyValueInfoImpl::solveBlockValueNonLocal(Value *Val, BasicBlock *BB) { // find a path to function entry. TODO: We should consider explicitly // canonicalizing to make this true rather than relying on this happy // accident. + std::optional PredLatticeElements; + if (PerPredRanges) + PredLatticeElements = std::make_optional(); for (BasicBlock *Pred : predecessors(BB)) { // Skip self loops. if (Pred == BB) @@ -710,8 +760,13 @@ LazyValueInfoImpl::solveBlockValueNonLocal(Value *Val, BasicBlock *BB) { << Pred->getName() << "' (non local).\n"); return Result; } + if (PerPredRanges) + PredLatticeElements->insert({Pred, *EdgeResult}); } + if (PerPredRanges) + TheCache.insertPredecessorResults(Val, BB, *PredLatticeElements); + // Return the merged value, which is more precise than 'overdefined'. assert(!Result.isOverdefined()); return Result; @@ -724,6 +779,9 @@ LazyValueInfoImpl::solveBlockValuePHINode(PHINode *PN, BasicBlock *BB) { // Loop over all of our predecessors, merging what we know from them into // result. See the comment about the chosen traversal order in // solveBlockValueNonLocal; the same reasoning applies here. + std::optional PredLatticeElements; + if (PerPredRanges) + PredLatticeElements = std::make_optional(); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *PhiBB = PN->getIncomingBlock(i); Value *PhiVal = PN->getIncomingValue(i); @@ -746,8 +804,14 @@ LazyValueInfoImpl::solveBlockValuePHINode(PHINode *PN, BasicBlock *BB) { return Result; } + + if (PerPredRanges) + PredLatticeElements->insert({PhiBB, *EdgeResult}); } + if (PerPredRanges) + TheCache.insertPredecessorResults(PN, BB, *PredLatticeElements); + // Return the merged value, which is more precise than 'overdefined'. assert(!Result.isOverdefined() && "Possible PHI in entry block?"); return Result; @@ -1002,7 +1066,77 @@ LazyValueInfoImpl::solveBlockValueBinaryOpImpl( const ConstantRange &LHSRange = *LHSRes; const ConstantRange &RHSRange = *RHSRes; - return ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange)); + + std::optional MergedResult = + ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange)); + + if (!PerPredRanges) + return MergedResult; + + std::optional PredLHS = + TheCache.getCachedPredecessorInfo(LHS, BB); + if (!PredLHS) + return MergedResult; + std::optional PredRHS = + TheCache.getCachedPredecessorInfo(RHS, BB); + if (!PredRHS) + return MergedResult; + + const BBLatticeElementMap &LHSPredMap = *PredLHS; + const BBLatticeElementMap &RHSPredMap = *PredRHS; + + BBLatticeElementMap PredLatticeElements; + ValueLatticeElement OverallPredResult; + for (auto *Pred : predecessors(BB)) { + auto LHSIt = LHSPredMap.find_as(Pred); + if (LHSIt == LHSPredMap.end()) + return MergedResult; + const ValueLatticeElement &LHSFromPred = LHSIt->second; + std::optional LHSFromPredRes = + LHSFromPred.asConstantRange(LHS->getType()); + if (!LHSFromPredRes) + return MergedResult; + + auto RHSIt = RHSPredMap.find_as(Pred); + if (RHSIt == RHSPredMap.end()) + return MergedResult; + const ValueLatticeElement &RHSFromPred = RHSIt->second; + std::optional RHSFromPredRes = + RHSFromPred.asConstantRange(RHS->getType()); + if (!RHSFromPredRes) + return MergedResult; + + const ConstantRange &LHSFromPredRange = *LHSFromPredRes; + const ConstantRange &RHSFromPredRange = *RHSFromPredRes; + std::optional PredResult = + ValueLatticeElement::getRange(OpFn(LHSFromPredRange, RHSFromPredRange)); + if (!PredResult) + return MergedResult; + if (PredResult->isOverdefined()) { + LLVM_DEBUG( + dbgs() << " pred BB '" << Pred->getName() << "' for BB '" + << BB->getName() + << "' overdefined. Discarding all predecessor intervals.\n"); + return MergedResult; + } + PredLatticeElements.insert({Pred, *PredResult}); + OverallPredResult.mergeIn(*PredResult); + } + + // If this point is reached, all predecessors for both LHS and RHS have + // constant ranges previously computed. Can cache result and use the + // OverallPredResult; + TheCache.insertPredecessorResults(I, BB, PredLatticeElements); + + LLVM_DEBUG(dbgs() << " Using predecessor intervals, evaluated " << *I + << " to: " << OverallPredResult << ".\n"); + + if (!MergedResult) + return OverallPredResult; + + LLVM_DEBUG(dbgs() << " Intersecting intervals for " << *I << ": " + << OverallPredResult << " and " << MergedResult << ".\n"); + return MergedResult->intersect(OverallPredResult); } std::optional diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/track-predecessor-ranges.ll b/llvm/test/Transforms/CorrelatedValuePropagation/track-predecessor-ranges.ll new file mode 100644 index 0000000000000..b5f688420d9c9 --- /dev/null +++ b/llvm/test/Transforms/CorrelatedValuePropagation/track-predecessor-ranges.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes="correlated-propagation" -S 2>&1 | FileCheck %s +; RUN: opt < %s -passes="correlated-propagation" -lvi-per-pred-ranges -S 2>&1 | FileCheck %s -check-prefix=LVI-PRED-RANGES + +@global = external local_unnamed_addr global [4338 x i32], align 16 + +define dso_local noundef zeroext i1 @bar(i64 noundef %arg, ptr noundef writeonly captures(none) %arg1) local_unnamed_addr { +; CHECK-LABEL: define dso_local noundef zeroext i1 @bar( +; CHECK-SAME: i64 noundef [[ARG:%.*]], ptr noundef writeonly captures(none) [[ARG1:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[ARG]], 1025 +; CHECK-NEXT: br i1 [[ICMP]], label %[[BB4:.*]], label %[[BB2:.*]] +; CHECK: [[BB2]]: +; CHECK-NEXT: [[ICMP3:%.*]] = icmp ult i64 [[ARG]], 262145 +; CHECK-NEXT: br i1 [[ICMP3]], label %[[BB4]], label %[[BB9:.*]] +; CHECK: [[BB4]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 7, %[[BB]] ], [ 15487, %[[BB2]] ] +; CHECK-NEXT: [[PHI5:%.*]] = phi i64 [ 3, %[[BB]] ], [ 7, %[[BB2]] ] +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[PHI]], [[ARG]] +; CHECK-NEXT: [[LSHR:%.*]] = lshr i64 [[ADD]], [[PHI5]] +; CHECK-NEXT: [[ICMP6:%.*]] = icmp samesign ult i64 [[LSHR]], 4338 +; CHECK-NEXT: br i1 [[ICMP6]], label %[[BB8:.*]], label %[[BB7:.*]] +; CHECK: [[BB7]]: +; CHECK-NEXT: tail call void @llvm.ubsantrap(i8 18) +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds nuw [4338 x i32], ptr @global, i64 0, i64 [[LSHR]] +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GETELEMENTPTR]], align 4 +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[LOAD]] to i64 +; CHECK-NEXT: store i64 [[SEXT]], ptr [[ARG1]], align 8 +; CHECK-NEXT: br label %[[BB9]] +; CHECK: [[BB9]]: +; CHECK-NEXT: [[PHI10:%.*]] = phi i1 [ true, %[[BB8]] ], [ false, %[[BB2]] ] +; CHECK-NEXT: ret i1 [[PHI10]] +; +; LVI-PRED-RANGES-LABEL: define dso_local noundef zeroext i1 @bar( +; LVI-PRED-RANGES-SAME: i64 noundef [[ARG:%.*]], ptr noundef writeonly captures(none) [[ARG1:%.*]]) local_unnamed_addr { +; LVI-PRED-RANGES-NEXT: [[BB:.*]]: +; LVI-PRED-RANGES-NEXT: [[ICMP:%.*]] = icmp ult i64 [[ARG]], 1025 +; LVI-PRED-RANGES-NEXT: br i1 [[ICMP]], label %[[BB4:.*]], label %[[BB2:.*]] +; LVI-PRED-RANGES: [[BB2]]: +; LVI-PRED-RANGES-NEXT: [[ICMP3:%.*]] = icmp ult i64 [[ARG]], 262145 +; LVI-PRED-RANGES-NEXT: br i1 [[ICMP3]], label %[[BB4]], label %[[BB9:.*]] +; LVI-PRED-RANGES: [[BB4]]: +; LVI-PRED-RANGES-NEXT: [[PHI:%.*]] = phi i64 [ 7, %[[BB]] ], [ 15487, %[[BB2]] ] +; LVI-PRED-RANGES-NEXT: [[PHI5:%.*]] = phi i64 [ 3, %[[BB]] ], [ 7, %[[BB2]] ] +; LVI-PRED-RANGES-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[PHI]], [[ARG]] +; LVI-PRED-RANGES-NEXT: [[LSHR:%.*]] = lshr i64 [[ADD]], [[PHI5]] +; LVI-PRED-RANGES-NEXT: br i1 true, label %[[BB8:.*]], label %[[BB7:.*]] +; LVI-PRED-RANGES: [[BB7]]: +; LVI-PRED-RANGES-NEXT: tail call void @llvm.ubsantrap(i8 18) +; LVI-PRED-RANGES-NEXT: unreachable +; LVI-PRED-RANGES: [[BB8]]: +; LVI-PRED-RANGES-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds nuw [4338 x i32], ptr @global, i64 0, i64 [[LSHR]] +; LVI-PRED-RANGES-NEXT: [[LOAD:%.*]] = load i32, ptr [[GETELEMENTPTR]], align 4 +; LVI-PRED-RANGES-NEXT: [[SEXT:%.*]] = sext i32 [[LOAD]] to i64 +; LVI-PRED-RANGES-NEXT: store i64 [[SEXT]], ptr [[ARG1]], align 8 +; LVI-PRED-RANGES-NEXT: br label %[[BB9]] +; LVI-PRED-RANGES: [[BB9]]: +; LVI-PRED-RANGES-NEXT: [[PHI10:%.*]] = phi i1 [ true, %[[BB8]] ], [ false, %[[BB2]] ] +; LVI-PRED-RANGES-NEXT: ret i1 [[PHI10]] +; +bb: + %icmp = icmp ult i64 %arg, 1025 + br i1 %icmp, label %bb4, label %bb2 + +bb2: ; preds = %bb + %icmp3 = icmp ult i64 %arg, 262145 + br i1 %icmp3, label %bb4, label %bb9 + +bb4: ; preds = %bb2, %bb + %phi = phi i64 [ 7, %bb ], [ 15487, %bb2 ] + %phi5 = phi i64 [ 3, %bb ], [ 7, %bb2 ] + %add = add nuw nsw i64 %phi, %arg + %lshr = lshr i64 %add, %phi5 + %icmp6 = icmp samesign ult i64 %lshr, 4338 + br i1 %icmp6, label %bb8, label %bb7 + +bb7: ; preds = %bb4 + tail call void @llvm.ubsantrap(i8 18) + unreachable + +bb8: ; preds = %bb4 + %getelementptr = getelementptr inbounds nuw [4338 x i32], ptr @global, i64 0, i64 %lshr + %load = load i32, ptr %getelementptr, align 4 + %sext = sext i32 %load to i64 + store i64 %sext, ptr %arg1, align 8 + br label %bb9 + +bb9: ; preds = %bb8, %bb2 + %phi10 = phi i1 [ true, %bb8 ], [ false, %bb2 ] + ret i1 %phi10 +} + +; Function Attrs: cold noreturn nounwind +declare void @llvm.ubsantrap(i8 immarg) #0 + +attributes #0 = { cold noreturn nounwind }