diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp index 04039b885f3c5..5d98bcd4927f6 100644 --- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -835,9 +835,39 @@ static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, return true; } +static bool VersionLoop(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, const LoopAccessInfo &LAI) { + + // Version the loop. The overflow check isn't a runtime pointer check, so we + // pass an empty list of runtime pointer checks, causing LoopVersioning to + // emit 'false' as the branch condition, and add our own check afterwards. + BasicBlock *CheckBlock = FI.OuterLoop->getLoopPreheader(); + ArrayRef Checks(nullptr, nullptr); + LoopVersioning LVer(LAI, Checks, FI.OuterLoop, LI, DT, SE); + LVer.versionLoop(); + + // Check for overflow by calculating the new tripcount using + // umul_with_overflow and then checking if it overflowed. + BranchInst *Br = dyn_cast(CheckBlock->getTerminator()); + if (!Br || !Br->isConditional()) + return false; + if (!match(Br->getCondition(), m_Zero())) + return false; + IRBuilder<> Builder(Br); + Value *Call = Builder.CreateIntrinsic(Intrinsic::umul_with_overflow, + FI.OuterTripCount->getType(), + {FI.OuterTripCount, FI.InnerTripCount}, + /*FMFSource=*/nullptr, "flatten.mul"); + FI.NewTripCount = Builder.CreateExtractValue(Call, 0, "flatten.tripcount"); + Value *Overflow = Builder.CreateExtractValue(Call, 1, "flatten.overflow"); + Br->setCondition(Overflow); + return true; +} + static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, - const TargetTransformInfo *TTI) { + const TargetTransformInfo *TTI, + const LoopAccessInfo &LAI) { if (!WidenIV) { LLVM_DEBUG(dbgs() << "Widening the IVs is disabled\n"); return false; @@ -916,7 +946,7 @@ static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, return false; // Check if we can widen the induction variables to avoid overflow checks. - bool CanFlatten = CanWidenIV(FI, DT, LI, SE, AC, TTI); + bool CanFlatten = CanWidenIV(FI, DT, LI, SE, AC, TTI, LAI); // It can happen that after widening of the IV, flattening may not be // possible/happening, e.g. when it is deemed unprofitable. So bail here if @@ -961,30 +991,8 @@ static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, return false; } LLVM_DEBUG(dbgs() << "Multiply might overflow, versioning loop\n"); - - // Version the loop. The overflow check isn't a runtime pointer check, so we - // pass an empty list of runtime pointer checks, causing LoopVersioning to - // emit 'false' as the branch condition, and add our own check afterwards. - BasicBlock *CheckBlock = FI.OuterLoop->getLoopPreheader(); - ArrayRef Checks(nullptr, nullptr); - LoopVersioning LVer(LAI, Checks, FI.OuterLoop, LI, DT, SE); - LVer.versionLoop(); - - // Check for overflow by calculating the new tripcount using - // umul_with_overflow and then checking if it overflowed. - BranchInst *Br = cast(CheckBlock->getTerminator()); - assert(Br->isConditional() && - "Expected LoopVersioning to generate a conditional branch"); - assert(match(Br->getCondition(), m_Zero()) && - "Expected branch condition to be false"); - IRBuilder<> Builder(Br); - Value *Call = Builder.CreateIntrinsic( - Intrinsic::umul_with_overflow, FI.OuterTripCount->getType(), - {FI.OuterTripCount, FI.InnerTripCount}, - /*FMFSource=*/nullptr, "flatten.mul"); - FI.NewTripCount = Builder.CreateExtractValue(Call, 0, "flatten.tripcount"); - Value *Overflow = Builder.CreateExtractValue(Call, 1, "flatten.overflow"); - Br->setCondition(Overflow); + bool LoopIsVersioned = VersionLoop(FI, DT, LI, SE, LAI); + assert(LoopIsVersioned && "Failed to version loop"); } else { LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n"); } diff --git a/llvm/test/Transforms/LoopFlatten/loop-flatten-version.ll b/llvm/test/Transforms/LoopFlatten/loop-flatten-version.ll index 85072bf3a43f4..0842f049feb11 100644 --- a/llvm/test/Transforms/LoopFlatten/loop-flatten-version.ll +++ b/llvm/test/Transforms/LoopFlatten/loop-flatten-version.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt %s -S -passes='loop(loop-flatten),verify' -verify-loop-info -verify-dom-info -verify-scev -o - | FileCheck %s +; RUN: opt %s -S -passes='loop(loop-flatten),verify' -loop-flatten-widen-iv=false -loop-flatten-version-loops=true -verify-loop-info -verify-dom-info -verify-scev -o - | FileCheck %s --check-prefix=CHECK-VERSION-OVER-WIDEN target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" @@ -61,6 +62,62 @@ define void @noinbounds_gep(i32 %N, ptr %A) { ; CHECK: for.end: ; CHECK-NEXT: ret void ; +; CHECK-VERSION-OVER-WIDEN-LABEL: define void @noinbounds_gep( +; CHECK-VERSION-OVER-WIDEN-SAME: i32 [[N:%.*]], ptr [[A:%.*]]) { +; CHECK-VERSION-OVER-WIDEN-NEXT: entry: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[CMP3:%.*]] = icmp ult i32 0, [[N]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[CMP3]], label [[FOR_INNER_PREHEADER_LVER_CHECK:%.*]], label [[FOR_END:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.inner.preheader.lver.check: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[FLATTEN_MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[N]], i32 [[N]]) +; CHECK-VERSION-OVER-WIDEN-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = extractvalue { i32, i1 } [[FLATTEN_MUL]], 0 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[FLATTEN_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[FLATTEN_MUL]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[FLATTEN_OVERFLOW]], label [[FOR_INNER_PREHEADER_PH_LVER_ORIG:%.*]], label [[FOR_INNER_PREHEADER_PH:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.inner.preheader.ph.lver.orig: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_INNER_PREHEADER_LVER_ORIG:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.inner.preheader.lver.orig: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[I_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_INNER_PREHEADER_PH_LVER_ORIG]] ], [ [[INC2_LVER_ORIG:%.*]], [[FOR_OUTER_LVER_ORIG:%.*]] ] +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_INNER_LVER_ORIG:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.inner.lver.orig: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[J_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_INNER_PREHEADER_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_INNER_LVER_ORIG]] ] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[I_LVER_ORIG]], [[N]] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[GEP_LVER_ORIG:%.*]] = getelementptr i32, ptr [[A]], i32 [[MUL_LVER_ORIG]] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[ARRAYIDX_LVER_ORIG:%.*]] = getelementptr i32, ptr [[GEP_LVER_ORIG]], i32 [[J_LVER_ORIG]] +; CHECK-VERSION-OVER-WIDEN-NEXT: store i32 0, ptr [[ARRAYIDX_LVER_ORIG]], align 4 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[INC1_LVER_ORIG]] = add nuw i32 [[J_LVER_ORIG]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[CMP2_LVER_ORIG:%.*]] = icmp ult i32 [[INC1_LVER_ORIG]], [[N]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[CMP2_LVER_ORIG]], label [[FOR_INNER_LVER_ORIG]], label [[FOR_OUTER_LVER_ORIG]] +; CHECK-VERSION-OVER-WIDEN: for.outer.lver.orig: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[INC2_LVER_ORIG]] = add i32 [[I_LVER_ORIG]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[CMP1_LVER_ORIG:%.*]] = icmp ult i32 [[INC2_LVER_ORIG]], [[N]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[CMP1_LVER_ORIG]], label [[FOR_INNER_PREHEADER_LVER_ORIG]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.inner.preheader.ph: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_INNER_PREHEADER:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.inner.preheader: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[I:%.*]] = phi i32 [ 0, [[FOR_INNER_PREHEADER_PH]] ], [ [[INC2:%.*]], [[FOR_OUTER:%.*]] ] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[FLATTEN_ARRAYIDX:%.*]] = getelementptr i32, ptr [[A]], i32 [[I]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_INNER:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.inner: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_INNER_PREHEADER]] ] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[MUL:%.*]] = mul i32 [[I]], [[N]] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[A]], i32 [[MUL]] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, ptr [[GEP]], i32 [[J]] +; CHECK-VERSION-OVER-WIDEN-NEXT: store i32 0, ptr [[FLATTEN_ARRAYIDX]], align 4 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[INC1:%.*]] = add nuw i32 [[J]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[CMP2:%.*]] = icmp ult i32 [[INC1]], [[N]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_OUTER]] +; CHECK-VERSION-OVER-WIDEN: for.outer: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[INC2]] = add i32 [[I]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[CMP1:%.*]] = icmp ult i32 [[INC2]], [[FLATTEN_TRIPCOUNT]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[CMP1]], label [[FOR_INNER_PREHEADER]], label [[FOR_END_LOOPEXIT_LOOPEXIT1:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.end.loopexit.loopexit: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.end.loopexit.loopexit1: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_END_LOOPEXIT]] +; CHECK-VERSION-OVER-WIDEN: for.end.loopexit: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_END]] +; CHECK-VERSION-OVER-WIDEN: for.end: +; CHECK-VERSION-OVER-WIDEN-NEXT: ret void +; entry: %cmp3 = icmp ult i32 0, %N br i1 %cmp3, label %for.outer.preheader, label %for.end @@ -124,6 +181,34 @@ define void @noinbounds_gep_too_large_mul(i64 %N, ptr %A) { ; CHECK: for.end: ; CHECK-NEXT: ret void ; +; CHECK-VERSION-OVER-WIDEN-LABEL: define void @noinbounds_gep_too_large_mul( +; CHECK-VERSION-OVER-WIDEN-SAME: i64 [[N:%.*]], ptr [[A:%.*]]) { +; CHECK-VERSION-OVER-WIDEN-NEXT: entry: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[CMP3:%.*]] = icmp ult i64 0, [[N]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[CMP3]], label [[FOR_INNER_PREHEADER_PH:%.*]], label [[FOR_END:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.outer.preheader: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_INNER_PREHEADER:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.inner.preheader: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[I:%.*]] = phi i64 [ 0, [[FOR_INNER_PREHEADER_PH]] ], [ [[INC2:%.*]], [[FOR_OUTER:%.*]] ] +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_INNER:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.inner: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[J:%.*]] = phi i64 [ 0, [[FOR_INNER_PREHEADER]] ], [ [[INC1:%.*]], [[FOR_INNER]] ] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[MUL:%.*]] = mul i64 [[I]], [[N]] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[A]], i64 [[MUL]] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, ptr [[GEP]], i64 [[J]] +; CHECK-VERSION-OVER-WIDEN-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[INC1]] = add nuw i64 [[J]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[CMP2:%.*]] = icmp ult i64 [[INC1]], [[N]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[CMP2]], label [[FOR_INNER]], label [[FOR_OUTER]] +; CHECK-VERSION-OVER-WIDEN: for.outer: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[INC2]] = add i64 [[I]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[CMP1:%.*]] = icmp ult i64 [[INC2]], [[N]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[CMP1]], label [[FOR_INNER_PREHEADER]], label [[FOR_END_LOOPEXIT_LOOPEXIT1:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.end.loopexit: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_END]] +; CHECK-VERSION-OVER-WIDEN: for.end: +; CHECK-VERSION-OVER-WIDEN-NEXT: ret void +; entry: %cmp3 = icmp ult i64 0, %N br i1 %cmp3, label %for.outer.preheader, label %for.end @@ -238,6 +323,79 @@ define void @d3_2(ptr %A, i32 %N, i32 %M) { ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; +; CHECK-VERSION-OVER-WIDEN-LABEL: define void @d3_2( +; CHECK-VERSION-OVER-WIDEN-SAME: ptr [[A:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) { +; CHECK-VERSION-OVER-WIDEN-NEXT: entry: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[CMP30:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[CMP30]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond1.preheader.lr.ph: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[CMP625:%.*]] = icmp sgt i32 [[M]], 0 +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond1.preheader.us: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[K_031_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LR_PH]] ], [ [[INC13_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ] +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[CMP625]], label [[FOR_COND5_PREHEADER_US_US_LVER_CHECK:%.*]], label [[FOR_COND5_PREHEADER_US43_PREHEADER:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond5.preheader.us43.preheader: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_LOOPEXIT50:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond5.preheader.us.us.lver.check: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[FLATTEN_MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[N]], i32 [[M]]) +; CHECK-VERSION-OVER-WIDEN-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = extractvalue { i32, i1 } [[FLATTEN_MUL]], 0 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[FLATTEN_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[FLATTEN_MUL]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[FLATTEN_OVERFLOW]], label [[FOR_COND5_PREHEADER_US_US_PH_LVER_ORIG:%.*]], label [[FOR_COND5_PREHEADER_US_US_PH:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond5.preheader.us.us.ph.lver.orig: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_COND5_PREHEADER_US_US_LVER_ORIG:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond5.preheader.us.us.lver.orig: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[I_028_US_US_LVER_ORIG:%.*]] = phi i32 [ [[INC10_US_US_LVER_ORIG:%.*]], [[FOR_COND5_FOR_COND_CLEANUP7_CRIT_EDGE_US_US_LVER_ORIG:%.*]] ], [ 0, [[FOR_COND5_PREHEADER_US_US_PH_LVER_ORIG]] ] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[MUL_US_US_LVER_ORIG:%.*]] = mul nsw i32 [[I_028_US_US_LVER_ORIG]], [[M]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_BODY8_US_US_LVER_ORIG:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.body8.us.us.lver.orig: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[J_026_US_US_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_COND5_PREHEADER_US_US_LVER_ORIG]] ], [ [[INC_US_US_LVER_ORIG:%.*]], [[FOR_BODY8_US_US_LVER_ORIG]] ] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[ADD_US_US_LVER_ORIG:%.*]] = add nsw i32 [[J_026_US_US_LVER_ORIG]], [[MUL_US_US_LVER_ORIG]] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[IDXPROM_US_US_LVER_ORIG:%.*]] = sext i32 [[ADD_US_US_LVER_ORIG]] to i64 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[ARRAYIDX_US_US_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM_US_US_LVER_ORIG]] +; CHECK-VERSION-OVER-WIDEN-NEXT: tail call void @f(ptr [[ARRAYIDX_US_US_LVER_ORIG]]) +; CHECK-VERSION-OVER-WIDEN-NEXT: [[INC_US_US_LVER_ORIG]] = add nuw nsw i32 [[J_026_US_US_LVER_ORIG]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp ne i32 [[INC_US_US_LVER_ORIG]], [[M]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_BODY8_US_US_LVER_ORIG]], label [[FOR_COND5_FOR_COND_CLEANUP7_CRIT_EDGE_US_US_LVER_ORIG]] +; CHECK-VERSION-OVER-WIDEN: for.cond5.for.cond.cleanup7_crit_edge.us.us.lver.orig: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[INC10_US_US_LVER_ORIG]] = add nuw nsw i32 [[I_028_US_US_LVER_ORIG]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[EXITCOND51_LVER_ORIG:%.*]] = icmp ne i32 [[INC10_US_US_LVER_ORIG]], [[N]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[EXITCOND51_LVER_ORIG]], label [[FOR_COND5_PREHEADER_US_US_LVER_ORIG]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_LOOPEXIT_LOOPEXIT:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond5.preheader.us.us.ph: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_COND5_PREHEADER_US_US:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond1.for.cond.cleanup3_crit_edge.us.loopexit.loopexit: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_LOOPEXIT:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond1.for.cond.cleanup3_crit_edge.us.loopexit.loopexit1: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_LOOPEXIT]] +; CHECK-VERSION-OVER-WIDEN: for.cond1.for.cond.cleanup3_crit_edge.us.loopexit: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] +; CHECK-VERSION-OVER-WIDEN: for.cond1.for.cond.cleanup3_crit_edge.us.loopexit50: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] +; CHECK-VERSION-OVER-WIDEN: for.cond1.for.cond.cleanup3_crit_edge.us: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[INC13_US]] = add nuw nsw i32 [[K_031_US]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[EXITCOND52:%.*]] = icmp ne i32 [[INC13_US]], [[N]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[EXITCOND52]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond5.preheader.us.us: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[I_028_US_US:%.*]] = phi i32 [ [[INC10_US_US:%.*]], [[FOR_COND5_FOR_COND_CLEANUP7_CRIT_EDGE_US_US:%.*]] ], [ 0, [[FOR_COND5_PREHEADER_US_US_PH]] ] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[MUL_US_US:%.*]] = mul nsw i32 [[I_028_US_US]], [[M]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_BODY8_US_US:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond5.for.cond.cleanup7_crit_edge.us.us: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[INC10_US_US]] = add nuw nsw i32 [[I_028_US_US]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[EXITCOND51:%.*]] = icmp ne i32 [[INC10_US_US]], [[FLATTEN_TRIPCOUNT]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[EXITCOND51]], label [[FOR_COND5_PREHEADER_US_US]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_LOOPEXIT_LOOPEXIT1:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.body8.us.us: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[J_026_US_US:%.*]] = phi i32 [ 0, [[FOR_COND5_PREHEADER_US_US]] ] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[ADD_US_US:%.*]] = add nsw i32 [[J_026_US_US]], [[MUL_US_US]] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[IDXPROM_US_US:%.*]] = sext i32 [[I_028_US_US]] to i64 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[ARRAYIDX_US_US:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM_US_US]] +; CHECK-VERSION-OVER-WIDEN-NEXT: tail call void @f(ptr [[ARRAYIDX_US_US]]) +; CHECK-VERSION-OVER-WIDEN-NEXT: [[INC_US_US:%.*]] = add nuw nsw i32 [[J_026_US_US]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC_US_US]], [[M]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_COND5_FOR_COND_CLEANUP7_CRIT_EDGE_US_US]] +; CHECK-VERSION-OVER-WIDEN: for.cond.cleanup.loopexit: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK-VERSION-OVER-WIDEN: for.cond.cleanup: +; CHECK-VERSION-OVER-WIDEN-NEXT: ret void +; entry: %cmp30 = icmp sgt i32 %N, 0 br i1 %cmp30, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup @@ -371,6 +529,79 @@ define void @overflow(i32 %lim, ptr %a) { ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[J_016]], 99999 ; CHECK-NEXT: br label [[FOR_COND_CLEANUP3]] ; +; CHECK-VERSION-OVER-WIDEN-LABEL: define void @overflow( +; CHECK-VERSION-OVER-WIDEN-SAME: i32 [[LIM:%.*]], ptr [[A:%.*]]) { +; CHECK-VERSION-OVER-WIDEN-NEXT: entry: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[CMP17_NOT:%.*]] = icmp eq i32 [[LIM]], 0 +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[CMP17_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_LVER_CHECK:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond1.preheader.lver.check: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[FLATTEN_MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[LIM]], i32 100000) +; CHECK-VERSION-OVER-WIDEN-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = extractvalue { i32, i1 } [[FLATTEN_MUL]], 0 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[FLATTEN_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[FLATTEN_MUL]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[FLATTEN_OVERFLOW]], label [[FOR_COND1_PREHEADER_PH_LVER_ORIG:%.*]], label [[FOR_COND1_PREHEADER_PH:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond1.preheader.ph.lver.orig: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_COND1_PREHEADER_LVER_ORIG:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond1.preheader.lver.orig: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[I_018_LVER_ORIG:%.*]] = phi i32 [ [[INC6_LVER_ORIG:%.*]], [[FOR_COND_CLEANUP3_LVER_ORIG:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_PH_LVER_ORIG]] ] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[I_018_LVER_ORIG]], 100000 +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_BODY4_LVER_ORIG:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.body4.lver.orig: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[J_016_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[IF_END_LVER_ORIG:%.*]] ] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[ADD_LVER_ORIG:%.*]] = add i32 [[J_016_LVER_ORIG]], [[MUL_LVER_ORIG]] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[TMP0:%.*]] = load i32, ptr @first, align 4 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[TOBOOL_NOT_LVER_ORIG:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[TOBOOL_NOT_LVER_ORIG]], label [[IF_END_LVER_ORIG]], label [[IF_THEN_LVER_ORIG:%.*]] +; CHECK-VERSION-OVER-WIDEN: if.then.lver.orig: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[ARRAYIDX_LVER_ORIG:%.*]] = getelementptr inbounds [0 x i8], ptr @a, i32 0, i32 [[ADD_LVER_ORIG]] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_LVER_ORIG]], align 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: tail call void asm sideeffect "", "r"(i8 [[TMP1]]) +; CHECK-VERSION-OVER-WIDEN-NEXT: store i32 0, ptr @first, align 4 +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[IF_END_LVER_ORIG]] +; CHECK-VERSION-OVER-WIDEN: if.end.lver.orig: +; CHECK-VERSION-OVER-WIDEN-NEXT: tail call void asm sideeffect "", "r"(i32 [[ADD_LVER_ORIG]]) +; CHECK-VERSION-OVER-WIDEN-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i32 [[J_016_LVER_ORIG]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[CMP2_LVER_ORIG:%.*]] = icmp ult i32 [[J_016_LVER_ORIG]], 99999 +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[CMP2_LVER_ORIG]], label [[FOR_BODY4_LVER_ORIG]], label [[FOR_COND_CLEANUP3_LVER_ORIG]] +; CHECK-VERSION-OVER-WIDEN: for.cond.cleanup3.lver.orig: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[INC6_LVER_ORIG]] = add i32 [[I_018_LVER_ORIG]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[CMP_LVER_ORIG:%.*]] = icmp ult i32 [[INC6_LVER_ORIG]], [[LIM]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[CMP_LVER_ORIG]], label [[FOR_COND1_PREHEADER_LVER_ORIG]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond1.preheader.ph: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond1.preheader: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[I_018:%.*]] = phi i32 [ [[INC6:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_PH]] ] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[MUL:%.*]] = mul i32 [[I_018]], 100000 +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_BODY4:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond.cleanup.loopexit.loopexit: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.cond.cleanup.loopexit.loopexit1: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]] +; CHECK-VERSION-OVER-WIDEN: for.cond.cleanup.loopexit: +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK-VERSION-OVER-WIDEN: for.cond.cleanup: +; CHECK-VERSION-OVER-WIDEN-NEXT: ret void +; CHECK-VERSION-OVER-WIDEN: for.cond.cleanup3: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[INC6]] = add i32 [[I_018]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC6]], [[FLATTEN_TRIPCOUNT]] +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[CMP]], label [[FOR_COND1_PREHEADER]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT1:%.*]] +; CHECK-VERSION-OVER-WIDEN: for.body4: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[J_016:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER]] ] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[ADD:%.*]] = add i32 [[J_016]], [[MUL]] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[TMP2:%.*]] = load i32, ptr @first, align 4 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-VERSION-OVER-WIDEN-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK-VERSION-OVER-WIDEN: if.then: +; CHECK-VERSION-OVER-WIDEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr @a, i32 0, i32 [[I_018]] +; CHECK-VERSION-OVER-WIDEN-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: tail call void asm sideeffect "", "r"(i8 [[TMP3]]) +; CHECK-VERSION-OVER-WIDEN-NEXT: store i32 0, ptr @first, align 4 +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[IF_END]] +; CHECK-VERSION-OVER-WIDEN: if.end: +; CHECK-VERSION-OVER-WIDEN-NEXT: tail call void asm sideeffect "", "r"(i32 [[I_018]]) +; CHECK-VERSION-OVER-WIDEN-NEXT: [[INC:%.*]] = add nuw nsw i32 [[J_016]], 1 +; CHECK-VERSION-OVER-WIDEN-NEXT: [[CMP2:%.*]] = icmp ult i32 [[J_016]], 99999 +; CHECK-VERSION-OVER-WIDEN-NEXT: br label [[FOR_COND_CLEANUP3]] +; entry: %cmp17.not = icmp eq i32 %lim, 0 br i1 %cmp17.not, label %for.cond.cleanup, label %for.cond1.preheader.preheader