diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index f4a4802bcb649..1e32ae44a62a7 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -406,6 +406,10 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); + // ReassociatePass may clear nsw/nuw flags of some instructions, which may + // have side effects on optimizations in IndVarSimplifyPass. + FPM.addPass(createFunctionToLoopPassAdaptor(IndVarSimplifyPass())); + // Form canonically associated expression trees, and simplify the trees using // basic mathematical properties. For example, this will form (nearly) // minimal multiplication trees. @@ -580,6 +584,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); + // ReassociatePass may clear nsw/nuw flags of some instructions, which may + // have side effects on optimizations in IndVarSimplifyPass. + FPM.addPass(createFunctionToLoopPassAdaptor(IndVarSimplifyPass())); + // Form canonically associated expression trees, and simplify the trees using // basic mathematical properties. For example, this will form (nearly) // minimal multiplication trees. diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index ecdb5a5e010d9..14e6eceb3a0f4 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -160,15 +160,16 @@ ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O-NEXT: Running pass: LoopSimplifyPass +; CHECK-O-NEXT: Running analysis: LoopAnalysis +; CHECK-O-NEXT: Running pass: LCSSAPass +; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis +; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Running pass: IndVarSimplifyPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O23SZ-NEXT: Running pass: ConstraintEliminationPass -; CHECK-O23SZ-NEXT: Running analysis: LoopAnalysis -; CHECK-O23SZ-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running pass: LoopSimplifyPass -; CHECK-O1-NEXT: Running analysis: LoopAnalysis ; CHECK-O-NEXT: Running pass: LCSSAPass -; CHECK-O1-NEXT: Running analysis: ScalarEvolutionAnalysis -; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LICM diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll index 064362eabbf83..064c64354cc35 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -98,15 +98,17 @@ ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass -; CHECK-O-NEXT: Running pass: ReassociatePass -; CHECK-O23SZ-NEXT: Running pass: ConstraintEliminationPass -; CHECK-O23SZ-NEXT: Running analysis: LoopAnalysis -; CHECK-O23SZ-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running pass: LoopSimplifyPass -; CHECK-O1-NEXT: Running analysis: LoopAnalysis +; CHECK-O-NEXT: Running analysis: LoopAnalysis ; CHECK-O-NEXT: Running pass: LCSSAPass -; CHECK-O1-NEXT: Running analysis: ScalarEvolutionAnalysis +; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Running pass: IndVarSimplifyPass +; CHECK-O-NEXT: Running pass: ReassociatePass +; CHECK-O23SZ-NEXT: Running pass: ConstraintEliminationPass +; CHECK-O1-NEXT: Running pass: LoopSimplifyPass +; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass +; CHECK-O-NEXT: Running pass: LCSSAPass ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LICM diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index 19a44867e434a..42883c52e0d1f 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -86,13 +86,15 @@ ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O-NEXT: Running pass: LoopSimplifyPass +; CHECK-O-NEXT: Running pass: LCSSAPass +; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis +; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Running pass: IndVarSimplifyPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O23SZ-NEXT: Running pass: ConstraintEliminationPass -; CHECK-O23SZ-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass -; CHECK-O1-NEXT: Running analysis: ScalarEvolutionAnalysis -; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LICM diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index ac80a31d8fd4b..20bd323ec3bfc 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -94,13 +94,15 @@ ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O-NEXT: Running pass: LoopSimplifyPass +; CHECK-O-NEXT: Running pass: LCSSAPass +; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis +; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Running pass: IndVarSimplifyPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O23SZ-NEXT: Running pass: ConstraintEliminationPass -; CHECK-O23SZ-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass -; CHECK-O1-NEXT: Running analysis: ScalarEvolutionAnalysis -; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LICM diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll index 6486639e07b49..4ae31bf6900a2 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll @@ -129,15 +129,16 @@ ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O-NEXT: Running pass: LoopSimplifyPass +; CHECK-O-NEXT: Running analysis: LoopAnalysis +; CHECK-O-NEXT: Running pass: LCSSAPass +; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis +; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Running pass: IndVarSimplifyPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O23SZ-NEXT: Running pass: ConstraintEliminationPass -; CHECK-O23SZ-NEXT: Running analysis: LoopAnalysis -; CHECK-O23SZ-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running pass: LoopSimplifyPass -; CHECK-O1-NEXT: Running analysis: LoopAnalysis ; CHECK-O-NEXT: Running pass: LCSSAPass -; CHECK-O1-NEXT: Running analysis: ScalarEvolutionAnalysis -; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LICM diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll index 09f9f0f48badd..326d132d4f44f 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -132,13 +132,15 @@ ; CHECK-O3-NEXT: Running pass: PGOMemOPSizeOpt ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O-NEXT: Running pass: LoopSimplifyPass +; CHECK-O-NEXT: Running pass: LCSSAPass +; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis +; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Running pass: IndVarSimplifyPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O23SZ-NEXT: Running pass: ConstraintEliminationPass -; CHECK-O23SZ-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass -; CHECK-O1-NEXT: Running analysis: ScalarEvolutionAnalysis -; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LICM diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index 47bdbfd2d357d..379fa8d18106c 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -98,13 +98,15 @@ ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O-NEXT: Running pass: LoopSimplifyPass +; CHECK-O-NEXT: Running pass: LCSSAPass +; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis +; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Running pass: IndVarSimplifyPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O23SZ-NEXT: Running pass: ConstraintEliminationPass -; CHECK-O23SZ-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass -; CHECK-O1-NEXT: Running analysis: ScalarEvolutionAnalysis -; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LICM diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/constraint-elimination-placement.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/constraint-elimination-placement.ll index eb813bdb8c4ee..54b3849cb5125 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/constraint-elimination-placement.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/constraint-elimination-placement.ll @@ -10,34 +10,36 @@ define i1 @test_order_1(ptr %this, ptr noalias %other, i1 %tobool9.not, i32 %cal ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[TOBOOL9_NOT]], label [[EXIT:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; CHECK: for.cond.preheader: -; CHECK-NEXT: [[CMP40_NOT3:%.*]] = icmp slt i32 [[CALL]], 1 -; CHECK-NEXT: br i1 [[CMP40_NOT3]], label [[FOR_COND41_PREHEADER_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.cond41.preheader.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[CALL]] to i64 -; CHECK-NEXT: br label [[FOR_COND41_PREHEADER:%.*]] -; CHECK: for.cond: -; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nsw i64 [[INDVARS_IV:%.*]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[TMP1]], 1 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_COND41_PREHEADER]] +; CHECK-NEXT: [[SMAX:%.*]] = tail call i32 @llvm.smax.i32(i32 [[CALL]], i32 1) +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[SMAX]] to i64 +; CHECK-NEXT: [[EXITCOND7_NOT:%.*]] = icmp sgt i32 [[CALL]], 0 +; CHECK-NEXT: br i1 [[EXITCOND7_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND41_PREHEADER:%.*]] ; CHECK: for.cond41.preheader: -; CHECK-NEXT: [[INDVARS_IV]] = phi i64 [ [[TMP0]], [[FOR_COND41_PREHEADER_PREHEADER]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND:%.*]] ] -; CHECK-NEXT: [[CALL431:%.*]] = load volatile i32, ptr [[OTHER]], align 4 -; CHECK-NEXT: [[CMP442:%.*]] = icmp sgt i32 [[CALL431]], 0 -; CHECK-NEXT: br i1 [[CMP442]], label [[FOR_BODY45_LR_PH:%.*]], label [[FOR_COND]] +; CHECK-NEXT: [[INDVARS_IV8:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC57:%.*]] ], [ [[TMP0]], [[FOR_COND_PREHEADER]] ] +; CHECK-NEXT: [[CALL433:%.*]] = load volatile i32, ptr [[OTHER]], align 4 +; CHECK-NEXT: [[CMP444:%.*]] = icmp sgt i32 [[CALL433]], 0 +; CHECK-NEXT: br i1 [[CMP444]], label [[FOR_BODY45_LR_PH:%.*]], label [[FOR_INC57]] ; CHECK: for.body45.lr.ph: -; CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr ptr, ptr [[OTHER]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr ptr, ptr [[OTHER]], i64 [[INDVARS_IV8]] ; CHECK-NEXT: br label [[FOR_BODY45:%.*]] ; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[INDVARS_IV_LCSSA6:%.*]] = phi i64 [ [[TMP0]], [[FOR_COND_PREHEADER]] ], [ [[WIDE_TRIP_COUNT]], [[FOR_INC57]] ] +; CHECK-NEXT: [[CMP40_NOT_LE5:%.*]] = icmp slt i64 [[INDVARS_IV_LCSSA6]], 1 ; CHECK-NEXT: store i32 0, ptr [[THIS]], align 4 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: for.body45: ; CHECK-NEXT: [[CALL49:%.*]] = load volatile i1, ptr [[ARRAYIDX_I_I]], align 1 ; CHECK-NEXT: [[CALL43:%.*]] = load volatile i32, ptr [[OTHER]], align 4 ; CHECK-NEXT: [[CMP44:%.*]] = icmp sgt i32 [[CALL43]], 0 -; CHECK-NEXT: br i1 [[CMP44]], label [[FOR_BODY45]], label [[FOR_COND]] +; CHECK-NEXT: br i1 [[CMP44]], label [[FOR_BODY45]], label [[FOR_INC57]] +; CHECK: for.inc57: +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV8]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_COND41_PREHEADER]] ; CHECK: exit: -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP40_NOT_LE5]], [[FOR_COND_CLEANUP]] ] +; CHECK-NEXT: ret i1 [[TMP1]] ; entry: %retval1 = alloca i1, i32 0, align 1 @@ -100,9 +102,9 @@ define void @test2(ptr %this) #0 { ; CHECK-NEXT: [[CALL2_I_I:%.*]] = load i64, ptr inttoptr (i64 8 to ptr), align 8 ; CHECK-NEXT: [[COND_I_I:%.*]] = select i1 [[CALL1_I_I]], i64 [[CALL2_I_I]], i64 0 ; CHECK-NEXT: switch i64 [[COND_I_I]], label [[COMMON_RET:%.*]] [ -; CHECK-NEXT: i64 11, label [[IF_END_I:%.*]] -; CHECK-NEXT: i64 13, label [[TEST2_FN2_EXIT12:%.*]] -; CHECK-NEXT: i64 17, label [[IF_END_I31:%.*]] +; CHECK-NEXT: i64 11, label [[IF_END_I:%.*]] +; CHECK-NEXT: i64 13, label [[TEST2_FN2_EXIT12:%.*]] +; CHECK-NEXT: i64 17, label [[IF_END_I31:%.*]] ; CHECK-NEXT: ] ; CHECK: if.end.i: ; CHECK-NEXT: [[CALL8_I_I:%.*]] = tail call fastcc i32 @test2_fn6() @@ -125,11 +127,11 @@ define void @test2(ptr %this) #0 { ; CHECK-NEXT: store i8 0, ptr [[THIS]], align 4 ; CHECK-NEXT: br label [[COMMON_RET]] ; CHECK: if.end.i31: -; CHECK-NEXT: [[DOTPRE:%.*]] = tail call fastcc i32 @test2_fn6() -; CHECK-NEXT: [[DOTPRE38:%.*]] = trunc i32 [[DOTPRE]] to i8 -; CHECK-NEXT: [[DOTPRE39:%.*]] = tail call i1 @test2_fn4(i8 [[DOTPRE38]]) -; CHECK-NEXT: [[DOTPRE40:%.*]] = xor i1 [[DOTPRE39]], true -; CHECK-NEXT: tail call void @llvm.assume(i1 [[DOTPRE40]]) +; CHECK-NEXT: [[CALL8_I_I32:%.*]] = tail call fastcc i32 @test2_fn6() +; CHECK-NEXT: [[TRUNC_I_I33:%.*]] = trunc i32 [[CALL8_I_I32]] to i8 +; CHECK-NEXT: [[CALL1_I1_I34:%.*]] = tail call i1 @test2_fn4(i8 [[TRUNC_I_I33]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[CALL1_I1_I34]], true +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]]) ; CHECK-NEXT: br label [[COMMON_RET]] ; entry: diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll index 77f53ad56e1cc..078d46fac8c30 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll @@ -7,23 +7,33 @@ target triple = "aarch64" define dso_local void @_Z3fooPiii(ptr %A, i32 %N, i32 %M) #0 { ; CHECK-LABEL: @_Z3fooPiii( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[CMP21:%.*]] = icmp sgt i32 [[M:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP3]], i1 [[CMP21]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.cond1.preheader.lr.ph.split.us: -; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[M]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[N]] to i64 -; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul nuw nsw i64 [[TMP0]], [[TMP1]] -; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[M:%.*]] to i64 +; CHECK-NEXT: [[SMAX7:%.*]] = tail call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: [[WIDE_TRIP_COUNT8:%.*]] = zext i32 [[SMAX7]] to i64 +; CHECK-NEXT: [[EXITCOND912_NOT:%.*]] = icmp slt i32 [[N]], 1 +; CHECK-NEXT: br i1 [[EXITCOND912_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]] +; CHECK: for.cond1.preheader.lr.ph: +; CHECK-NEXT: [[SMAX:%.*]] = tail call i32 @llvm.smax.i32(i32 [[M]], i32 0) +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 +; CHECK-NEXT: [[EXITCOND10_NOT:%.*]] = icmp slt i32 [[M]], 1 +; CHECK-NEXT: br i1 [[EXITCOND10_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_COND1_PREHEADER_US:%.*]] ; CHECK: for.cond1.preheader.us: -; CHECK-NEXT: [[INDVAR6:%.*]] = phi i64 [ [[INDVAR_NEXT7:%.*]], [[FOR_COND1_PREHEADER_US]] ], [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US]] ] -; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVAR6]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4 +; CHECK-NEXT: [[INDVARS_IV313_US:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_LR_PH]] ] +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i64 [[INDVARS_IV313_US]], [[TMP0]] +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP1]] +; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] +; CHECK: for.body4.us: +; CHECK-NEXT: [[INDVARS_IV11_US:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INDVARS_IV_NEXT_US:%.*]], [[FOR_BODY4_US]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV11_US]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP]], align 4 ; CHECK-NEXT: tail call void @_Z1fi(i32 [[TMP2]]) -; CHECK-NEXT: [[INDVAR_NEXT7]] = add nuw nsw i64 [[INDVAR6]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVAR_NEXT7]], [[FLATTEN_TRIPCOUNT]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_COND1_PREHEADER_US]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_US]] = add nuw nsw i64 [[INDVARS_IV11_US]], 1 +; CHECK-NEXT: [[EXITCOND_US_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_US]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_US_NOT]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label [[FOR_BODY4_US]] +; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: +; CHECK-NEXT: [[INDVARS_IV_NEXT4_US]] = add nuw nsw i64 [[INDVARS_IV313_US]], 1 +; CHECK-NEXT: [[EXITCOND9_US_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT4_US]], [[WIDE_TRIP_COUNT8]] +; CHECK-NEXT: br i1 [[EXITCOND9_US_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_COND1_PREHEADER_US]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll index d048b0bab4176..8cc087c1b9386 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -83,39 +83,39 @@ entry: define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferenceable(1800) %A, ptr nonnull align 8 dereferenceable(1800) %B) { ; CHECK-LABEL: @matrix_extract_insert_loop( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP210_NOT:%.*]] = icmp eq i32 [[I:%.*]], 0 -; CHECK-NEXT: [[CONV6:%.*]] = zext i32 [[I]] to i64 -; CHECK-NEXT: br i1 [[CMP210_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US:%.*]] +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[I:%.*]] to i64 +; CHECK-NEXT: [[EXITCOND15_NOT:%.*]] = icmp eq i32 [[I]], 0 +; CHECK-NEXT: br i1 [[EXITCOND15_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US:%.*]] ; CHECK: for.cond1.preheader.us: ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[I]], 225 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, ptr [[B:%.*]], i64 0, i64 [[CONV6]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, ptr [[B:%.*]], i64 0, i64 [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] ; CHECK: for.body4.us: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4_US]] ] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[INDVARS_IV]], 225 +; CHECK-NEXT: [[INDVARS_IV16_US:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INDVARS_IV_NEXT_US:%.*]], [[FOR_BODY4_US]] ] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[INDVARS_IV16_US]], 225 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]]) -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i64 0, i64 [[INDVARS_IV16_US]] ; CHECK-NEXT: [[MATRIXEXT_US:%.*]] = load double, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[MATRIXEXT8_US:%.*]] = load double, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[MUL_US:%.*]] = fmul double [[MATRIXEXT_US]], [[MATRIXEXT8_US]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[INDVARS_IV16_US]] ; CHECK-NEXT: [[MATRIXEXT11_US:%.*]] = load double, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[SUB_US:%.*]] = fsub double [[MATRIXEXT11_US]], [[MUL_US]] ; CHECK-NEXT: store double [[SUB_US]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[CONV6]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]], label [[FOR_BODY4_US]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_US]] = add nuw nsw i64 [[INDVARS_IV16_US]], 1 +; CHECK-NEXT: [[EXITCOND_US_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_US]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_US_NOT]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]], label [[FOR_BODY4_US]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: -; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[CONV6]], 15 +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[WIDE_TRIP_COUNT]], 15 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[I]], 210 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP5]] ; CHECK-NEXT: br label [[FOR_BODY4_US_1:%.*]] ; CHECK: for.body4.us.1: -; CHECK-NEXT: [[INDVARS_IV_1:%.*]] = phi i64 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ [[INDVARS_IV_NEXT_1:%.*]], [[FOR_BODY4_US_1]] ] -; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[INDVARS_IV_1]], 15 -; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i64 [[INDVARS_IV_1]], 210 +; CHECK-NEXT: [[INDVARS_IV16_US_1:%.*]] = phi i64 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ [[INDVARS_IV_NEXT_US_1:%.*]], [[FOR_BODY4_US_1]] ] +; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[INDVARS_IV16_US_1]], 15 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i64 [[INDVARS_IV16_US_1]], 210 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP9]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP8]] ; CHECK-NEXT: [[MATRIXEXT_US_1:%.*]] = load double, ptr [[TMP10]], align 8 @@ -125,19 +125,19 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: [[MATRIXEXT11_US_1:%.*]] = load double, ptr [[TMP11]], align 8 ; CHECK-NEXT: [[SUB_US_1:%.*]] = fsub double [[MATRIXEXT11_US_1]], [[MUL_US_1]] ; CHECK-NEXT: store double [[SUB_US_1]], ptr [[TMP11]], align 8 -; CHECK-NEXT: [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV_1]], 1 -; CHECK-NEXT: [[EXITCOND_NOT_1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_1]], [[CONV6]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT_1]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1:%.*]], label [[FOR_BODY4_US_1]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_US_1]] = add nuw nsw i64 [[INDVARS_IV16_US_1]], 1 +; CHECK-NEXT: [[EXITCOND_US_NOT_1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_US_1]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_US_NOT_1]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1:%.*]], label [[FOR_BODY4_US_1]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.1: -; CHECK-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[CONV6]], 30 +; CHECK-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[WIDE_TRIP_COUNT]], 30 ; CHECK-NEXT: [[TMP13:%.*]] = icmp ult i32 [[I]], 195 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP12]] ; CHECK-NEXT: br label [[FOR_BODY4_US_2:%.*]] ; CHECK: for.body4.us.2: -; CHECK-NEXT: [[INDVARS_IV_2:%.*]] = phi i64 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ [[INDVARS_IV_NEXT_2:%.*]], [[FOR_BODY4_US_2]] ] -; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[INDVARS_IV_2]], 30 -; CHECK-NEXT: [[TMP16:%.*]] = icmp ult i64 [[INDVARS_IV_2]], 195 +; CHECK-NEXT: [[INDVARS_IV16_US_2:%.*]] = phi i64 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ [[INDVARS_IV_NEXT_US_2:%.*]], [[FOR_BODY4_US_2]] ] +; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[INDVARS_IV16_US_2]], 30 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ult i64 [[INDVARS_IV16_US_2]], 195 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP16]]) ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP15]] ; CHECK-NEXT: [[MATRIXEXT_US_2:%.*]] = load double, ptr [[TMP17]], align 8 @@ -147,19 +147,19 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: [[MATRIXEXT11_US_2:%.*]] = load double, ptr [[TMP18]], align 8 ; CHECK-NEXT: [[SUB_US_2:%.*]] = fsub double [[MATRIXEXT11_US_2]], [[MUL_US_2]] ; CHECK-NEXT: store double [[SUB_US_2]], ptr [[TMP18]], align 8 -; CHECK-NEXT: [[INDVARS_IV_NEXT_2]] = add nuw nsw i64 [[INDVARS_IV_2]], 1 -; CHECK-NEXT: [[EXITCOND_NOT_2:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_2]], [[CONV6]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT_2]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2:%.*]], label [[FOR_BODY4_US_2]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_US_2]] = add nuw nsw i64 [[INDVARS_IV16_US_2]], 1 +; CHECK-NEXT: [[EXITCOND_US_NOT_2:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_US_2]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_US_NOT_2]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2:%.*]], label [[FOR_BODY4_US_2]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.2: -; CHECK-NEXT: [[TMP19:%.*]] = add nuw nsw i64 [[CONV6]], 45 +; CHECK-NEXT: [[TMP19:%.*]] = add nuw nsw i64 [[WIDE_TRIP_COUNT]], 45 ; CHECK-NEXT: [[TMP20:%.*]] = icmp ult i32 [[I]], 180 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP20]]) ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP19]] ; CHECK-NEXT: br label [[FOR_BODY4_US_3:%.*]] ; CHECK: for.body4.us.3: -; CHECK-NEXT: [[INDVARS_IV_3:%.*]] = phi i64 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY4_US_3]] ] -; CHECK-NEXT: [[TMP22:%.*]] = add nuw nsw i64 [[INDVARS_IV_3]], 45 -; CHECK-NEXT: [[TMP23:%.*]] = icmp ult i64 [[INDVARS_IV_3]], 180 +; CHECK-NEXT: [[INDVARS_IV16_US_3:%.*]] = phi i64 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]] ], [ [[INDVARS_IV_NEXT_US_3:%.*]], [[FOR_BODY4_US_3]] ] +; CHECK-NEXT: [[TMP22:%.*]] = add nuw nsw i64 [[INDVARS_IV16_US_3]], 45 +; CHECK-NEXT: [[TMP23:%.*]] = icmp ult i64 [[INDVARS_IV16_US_3]], 180 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]]) ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP22]] ; CHECK-NEXT: [[MATRIXEXT_US_3:%.*]] = load double, ptr [[TMP24]], align 8 @@ -169,9 +169,9 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: [[MATRIXEXT11_US_3:%.*]] = load double, ptr [[TMP25]], align 8 ; CHECK-NEXT: [[SUB_US_3:%.*]] = fsub double [[MATRIXEXT11_US_3]], [[MUL_US_3]] ; CHECK-NEXT: store double [[SUB_US_3]], ptr [[TMP25]], align 8 -; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV_3]], 1 -; CHECK-NEXT: [[EXITCOND_NOT_3:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_3]], [[CONV6]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT_3]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY4_US_3]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_US_3]] = add nuw nsw i64 [[INDVARS_IV16_US_3]], 1 +; CHECK-NEXT: [[EXITCOND_US_NOT_3:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_US_3]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_US_NOT_3]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY4_US_3]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll index a077239e5ffc7..d8579dbcecffa 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll @@ -10,13 +10,13 @@ define i64 @sum_2_at_with_int_conversion(ptr %A, ptr %B, i64 %N) { ; CHECK-LABEL: @sum_2_at_with_int_conversion( ; CHECK-NEXT: at_with_int_conversion.exit11.peel: +; CHECK-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 0) ; CHECK-NEXT: [[START_I:%.*]] = load ptr, ptr [[A:%.*]], align 8 ; CHECK-NEXT: [[GEP_END_I:%.*]] = getelementptr [[VEC:%.*]], ptr [[A]], i64 0, i32 1 ; CHECK-NEXT: [[END_I:%.*]] = load ptr, ptr [[GEP_END_I]], align 8 ; CHECK-NEXT: [[START_INT_I:%.*]] = ptrtoint ptr [[START_I]] to i64 ; CHECK-NEXT: [[END_INT_I:%.*]] = ptrtoint ptr [[END_I]] to i64 ; CHECK-NEXT: [[SUB_I:%.*]] = sub i64 [[END_INT_I]], [[START_INT_I]] -; CHECK-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 0) ; CHECK-NEXT: [[GEP_END_I2:%.*]] = getelementptr [[VEC]], ptr [[B:%.*]], i64 0, i32 1 ; CHECK-NEXT: [[START_I1_PEEL:%.*]] = load ptr, ptr [[B]], align 8 ; CHECK-NEXT: [[END_I3_PEEL:%.*]] = load ptr, ptr [[GEP_END_I2]], align 8 @@ -26,8 +26,8 @@ define i64 @sum_2_at_with_int_conversion(ptr %A, ptr %B, i64 %N) { ; CHECK-NEXT: [[LV_I_PEEL:%.*]] = load i64, ptr [[START_I]], align 8 ; CHECK-NEXT: [[LV_I9_PEEL:%.*]] = load i64, ptr [[START_I1_PEEL]], align 8 ; CHECK-NEXT: [[SUM_NEXT_PEEL:%.*]] = add i64 [[LV_I_PEEL]], [[LV_I9_PEEL]] -; CHECK-NEXT: [[EXITCOND_PEEL_NOT:%.*]] = icmp slt i64 [[N]], 1 -; CHECK-NEXT: br i1 [[EXITCOND_PEEL_NOT]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]] +; CHECK-NEXT: [[EXITCOND_NOT_PEEL:%.*]] = icmp slt i64 [[N]], 1 +; CHECK-NEXT: br i1 [[EXITCOND_NOT_PEEL]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]] ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 ; CHECK-NEXT: [[UMIN:%.*]] = tail call i64 @llvm.umin.i64(i64 [[SUB_I6_PEEL]], i64 [[TMP0]]) @@ -121,6 +121,7 @@ exit: define i64 @sum_3_at_with_int_conversion(ptr %A, ptr %B, ptr %C, i64 %N) { ; CHECK-LABEL: @sum_3_at_with_int_conversion( ; CHECK-NEXT: at_with_int_conversion.exit22.peel: +; CHECK-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 0) ; CHECK-NEXT: [[START_I:%.*]] = load ptr, ptr [[A:%.*]], align 8 ; CHECK-NEXT: [[GEP_END_I:%.*]] = getelementptr [[VEC:%.*]], ptr [[A]], i64 0, i32 1 ; CHECK-NEXT: [[END_I:%.*]] = load ptr, ptr [[GEP_END_I]], align 8 @@ -128,7 +129,6 @@ define i64 @sum_3_at_with_int_conversion(ptr %A, ptr %B, ptr %C, i64 %N) { ; CHECK-NEXT: [[END_INT_I:%.*]] = ptrtoint ptr [[END_I]] to i64 ; CHECK-NEXT: [[SUB_I:%.*]] = sub i64 [[END_INT_I]], [[START_INT_I]] ; CHECK-NEXT: [[GEP_END_I13:%.*]] = getelementptr [[VEC]], ptr [[C:%.*]], i64 0, i32 1 -; CHECK-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 0) ; CHECK-NEXT: [[GEP_END_I2:%.*]] = getelementptr [[VEC]], ptr [[B:%.*]], i64 0, i32 1 ; CHECK-NEXT: [[LV_I_PEEL:%.*]] = load i64, ptr [[START_I]], align 8 ; CHECK-NEXT: [[START_I1_PEEL:%.*]] = load ptr, ptr [[B]], align 8 @@ -146,8 +146,8 @@ define i64 @sum_3_at_with_int_conversion(ptr %A, ptr %B, ptr %C, i64 %N) { ; CHECK-NEXT: [[LV_I20_PEEL:%.*]] = load i64, ptr [[START_I12_PEEL]], align 8 ; CHECK-NEXT: [[ADD_2_PEEL:%.*]] = add i64 [[LV_I_PEEL]], [[LV_I9_PEEL]] ; CHECK-NEXT: [[SUM_NEXT_PEEL:%.*]] = add i64 [[ADD_2_PEEL]], [[LV_I20_PEEL]] -; CHECK-NEXT: [[EXITCOND_PEEL_NOT:%.*]] = icmp slt i64 [[N]], 1 -; CHECK-NEXT: br i1 [[EXITCOND_PEEL_NOT]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]] +; CHECK-NEXT: [[EXITCOND_NOT_PEEL:%.*]] = icmp slt i64 [[N]], 1 +; CHECK-NEXT: br i1 [[EXITCOND_NOT_PEEL]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]] ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 ; CHECK-NEXT: [[UMIN:%.*]] = tail call i64 @llvm.umin.i64(i64 [[SUB_I17_PEEL]], i64 [[TMP0]]) diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll index 64e1076da44dd..a5cacf904dc8f 100644 --- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll +++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll @@ -9,11 +9,11 @@ target triple = "thumbv6m-none-none-eabi" define void @arm_mean_q7(ptr noundef %pSrc, i32 noundef %blockSize, ptr noundef %pResult) #0 { ; CHECK-LABEL: @arm_mean_q7( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP_NOT10:%.*]] = icmp ult i32 [[BLOCKSIZE:%.*]], 16 +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[BLOCKSIZE:%.*]], -16 +; CHECK-NEXT: [[CMP_NOT10:%.*]] = icmp ult i32 [[BLOCKSIZE]], 16 ; CHECK-NEXT: br i1 [[CMP_NOT10]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK: while.body.preheader: ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[BLOCKSIZE]], 4 -; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[BLOCKSIZE]], -16 ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[SUM_013:%.*]] = phi i32 [ [[TMP3:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] @@ -25,26 +25,23 @@ define void @arm_mean_q7(ptr noundef %pSrc, i32 noundef %blockSize, ptr noundef ; CHECK-NEXT: [[DEC]] = add nsw i32 [[BLKCNT_011]], -1 ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i8, ptr [[PSRC_ADDR_012]], i32 16 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP0]] -; CHECK-NEXT: br label [[WHILE_END]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]] ; CHECK: while.end: -; CHECK-NEXT: [[PSRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PSRC]], [[ENTRY:%.*]] ], [ [[SCEVGEP]], [[WHILE_END_LOOPEXIT]] ] -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP3]], [[WHILE_END_LOOPEXIT]] ] +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[WHILE_BODY]] ] ; CHECK-NEXT: [[AND:%.*]] = and i32 [[BLOCKSIZE]], 15 -; CHECK-NEXT: [[CMP2_NOT15:%.*]] = icmp eq i32 [[AND]], 0 -; CHECK-NEXT: br i1 [[CMP2_NOT15]], label [[WHILE_END5:%.*]], label [[MIDDLE_BLOCK:%.*]] -; CHECK: middle.block: +; CHECK-NEXT: [[CMP2_NOT14:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: br i1 [[CMP2_NOT14]], label [[WHILE_END5:%.*]], label [[WHILE_BODY3_PREHEADER:%.*]] +; CHECK: while.body3.preheader: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP0]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = tail call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 [[AND]]) -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[PSRC_ADDR_0_LCSSA]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[SCEVGEP]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison) ; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i8> [[WIDE_MASKED_LOAD]] to <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[SUM_0_LCSSA]] ; CHECK-NEXT: br label [[WHILE_END5]] ; CHECK: while.end5: -; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA]], [[WHILE_END]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA]], [[WHILE_END]] ], [ [[TMP7]], [[WHILE_BODY3_PREHEADER]] ] ; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[SUM_1_LCSSA]], [[BLOCKSIZE]] ; CHECK-NEXT: [[CONV6:%.*]] = trunc i32 [[DIV]] to i8 ; CHECK-NEXT: store i8 [[CONV6]], ptr [[PRESULT:%.*]], align 1 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll b/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll index 80f96b17c9083..9f13893db7375 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll @@ -168,14 +168,15 @@ exit: define void @test_runtime_trip_count(i32 %N) { ; CHECK-LABEL: @test_runtime_trip_count( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK-NEXT: [[SMAX:%.*]] = tail call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 +; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp slt i32 [[N]], 1 +; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER7:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -200,18 +201,18 @@ define void @test_runtime_trip_count(i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT]], label [[FOR_BODY_PREHEADER7]] ; CHECK: for.body.preheader7: -; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[INDVARS_IV3_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER7]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV3_PH]], [[FOR_BODY_PREHEADER7]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDVARS_IV3]] ; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDVARS_IV3]] ; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[ARRAYIDX2]], align 8 ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDVARS_IV3]] ; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX4]], align 8 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV3]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll b/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll index e169f2570cd0e..976dbdc1aa179 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll @@ -21,14 +21,15 @@ target triple = "x86_64-apple-macosx10.15.0" define void @loop_or(ptr noalias %pIn, ptr noalias %pOut, i32 %s) { ; CHECK-LABEL: @loop_or( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[S:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[SMAX:%.*]] = tail call i32 @llvm.smax.i32(i32 [[S:%.*]], i32 0) +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 +; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp slt i32 [[S]], 1 +; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[S]] to i64 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[S]], 8 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER5:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967288 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483640 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -53,18 +54,18 @@ define void @loop_or(ptr noalias %pIn, ptr noalias %pOut, i32 %s) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER5]] ; CHECK: for.body.preheader5: -; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[INDVARS_IV3_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER5]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[PIN]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV3_PH]], [[FOR_BODY_PREHEADER5]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[PIN]], i64 [[INDVARS_IV3]] ; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP11]] to i32 ; CHECK-NEXT: [[OR2:%.*]] = mul nuw nsw i32 [[CONV]], 65793 ; CHECK-NEXT: [[OR3:%.*]] = or i32 [[OR2]], -16777216 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[POUT]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[POUT]], i64 [[INDVARS_IV3]] ; CHECK-NEXT: store i32 [[OR3]], ptr [[ARRAYIDX5]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV3]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.end: diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll index 0fbbafca696c8..9698d7b6b37fa 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll @@ -14,19 +14,20 @@ target triple = "x86_64-apple-macosx10.15.0" define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 { ; CHECK-LABEL: @vdiv( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[SMAX:%.*]] = tail call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 +; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp slt i32 [[N]], 1 +; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[X4:%.*]] = ptrtoint ptr [[X:%.*]] to i64 ; CHECK-NEXT: [[Y5:%.*]] = ptrtoint ptr [[Y:%.*]] to i64 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 16 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 16 ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[X4]], [[Y5]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_BODY_PREHEADER9:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967280 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483632 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] @@ -63,8 +64,8 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER9]] ; CHECK: for.body.preheader9: -; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[INDVARS_IV_PH]], -1 +; CHECK-NEXT: [[INDVARS_IV3_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[INDVARS_IV3_PH]], -1 ; CHECK-NEXT: [[TMP19:%.*]] = add nsw i64 [[TMP18]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 7 ; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0 @@ -73,19 +74,19 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 { ; CHECK-NEXT: [[TMP20:%.*]] = fdiv fast double 1.000000e+00, [[A]] ; CHECK-NEXT: br label [[FOR_BODY_PROL:%.*]] ; CHECK: for.body.prol: -; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ] +; CHECK-NEXT: [[INDVARS_IV3_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV3_PH]], [[FOR_BODY_PROL_PREHEADER]] ] ; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], [[FOR_BODY_PROL]] ], [ 0, [[FOR_BODY_PROL_PREHEADER]] ] -; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_PROL]] +; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV3_PROL]] ; CHECK-NEXT: [[T0_PROL:%.*]] = load double, ptr [[ARRAYIDX_PROL]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[TMP21:%.*]] = fmul fast double [[T0_PROL]], [[TMP20]] -; CHECK-NEXT: [[ARRAYIDX2_PROL:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_PROL]] +; CHECK-NEXT: [[ARRAYIDX2_PROL:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV3_PROL]] ; CHECK-NEXT: store double [[TMP21]], ptr [[ARRAYIDX2_PROL]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV3_PROL]], 1 ; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 ; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]] ; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label [[FOR_BODY_PROL_LOOPEXIT]], label [[FOR_BODY_PROL]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: for.body.prol.loopexit: -; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER9]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ] +; CHECK-NEXT: [[INDVARS_IV3_UNR:%.*]] = phi i64 [ [[INDVARS_IV3_PH]], [[FOR_BODY_PREHEADER9]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ] ; CHECK-NEXT: [[TMP22:%.*]] = icmp ult i64 [[TMP19]], 7 ; CHECK-NEXT: br i1 [[TMP22]], label [[FOR_END]], label [[FOR_BODY_PREHEADER9_NEW:%.*]] ; CHECK: for.body.preheader9.new: @@ -99,55 +100,55 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 { ; CHECK-NEXT: [[TMP30:%.*]] = fdiv fast double 1.000000e+00, [[A]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER9_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV3_UNR]], [[FOR_BODY_PREHEADER9_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV3]] ; CHECK-NEXT: [[T0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[TMP31:%.*]] = fmul fast double [[T0]], [[TMP23]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV3]] ; CHECK-NEXT: store double [[TMP31]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV3]], 1 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: [[T0_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[TMP32:%.*]] = fmul fast double [[T0_1]], [[TMP24]] ; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: store double [[TMP32]], ptr [[ARRAYIDX2_1]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 +; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV3]], 2 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_1]] ; CHECK-NEXT: [[T0_2:%.*]] = load double, ptr [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[TMP33:%.*]] = fmul fast double [[T0_2]], [[TMP25]] ; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_1]] ; CHECK-NEXT: store double [[TMP33]], ptr [[ARRAYIDX2_2]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3 +; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV3]], 3 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_2]] ; CHECK-NEXT: [[T0_3:%.*]] = load double, ptr [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[TMP34:%.*]] = fmul fast double [[T0_3]], [[TMP26]] ; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_2]] ; CHECK-NEXT: store double [[TMP34]], ptr [[ARRAYIDX2_3]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV3]], 4 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_3]] ; CHECK-NEXT: [[T0_4:%.*]] = load double, ptr [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[TMP35:%.*]] = fmul fast double [[T0_4]], [[TMP27]] ; CHECK-NEXT: [[ARRAYIDX2_4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_3]] ; CHECK-NEXT: store double [[TMP35]], ptr [[ARRAYIDX2_4]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5 +; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV3]], 5 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_4]] ; CHECK-NEXT: [[T0_5:%.*]] = load double, ptr [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[TMP36:%.*]] = fmul fast double [[T0_5]], [[TMP28]] ; CHECK-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_4]] ; CHECK-NEXT: store double [[TMP36]], ptr [[ARRAYIDX2_5]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6 +; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV3]], 6 ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_5]] ; CHECK-NEXT: [[T0_6:%.*]] = load double, ptr [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[TMP37:%.*]] = fmul fast double [[T0_6]], [[TMP29]] ; CHECK-NEXT: [[ARRAYIDX2_6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_5]] ; CHECK-NEXT: store double [[TMP37]], ptr [[ARRAYIDX2_6]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7 +; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV3]], 7 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_6]] ; CHECK-NEXT: [[T0_7:%.*]] = load double, ptr [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[TMP38:%.*]] = fmul fast double [[T0_7]], [[TMP30]] ; CHECK-NEXT: [[ARRAYIDX2_7:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_6]] ; CHECK-NEXT: store double [[TMP38]], ptr [[ARRAYIDX2_7]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8 +; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV3]], 8 ; CHECK-NEXT: [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: for.end: diff --git a/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll b/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll index e1eac5f804854..d4c3298b1dd0c 100644 --- a/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll +++ b/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll @@ -345,25 +345,25 @@ define void @monkey(ptr noundef %arr, i32 noundef %len) { ; CHECK-LABEL: define void @monkey ; CHECK-SAME: (ptr nocapture noundef [[ARR:%.*]], i32 noundef [[LEN:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[LEN]], 1 -; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY4_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body4.preheader: -; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt i32 [[LEN]], 1 +; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_COND1_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond1.preheader: +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 1, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY4:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; CHECK: for.cond.cleanup3: -; CHECK-NEXT: [[INC]] = add nuw i32 [[I_09]], 1 +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_08]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC]], [[LEN]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY4_PREHEADER]], label [[FOR_COND_CLEANUP]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND1_PREHEADER]], label [[FOR_COND_CLEANUP]] ; CHECK: for.body4: -; CHECK-NEXT: [[K_07:%.*]] = phi i32 [ [[DEC:%.*]], [[FOR_BODY4]] ], [ [[I_09]], [[FOR_BODY4_PREHEADER]] ] -; CHECK-NEXT: [[IDX_EXT_I:%.*]] = zext i32 [[K_07]] to i64 +; CHECK-NEXT: [[K_06:%.*]] = phi i32 [ [[I_08]], [[FOR_COND1_PREHEADER]] ], [ [[DEC:%.*]], [[FOR_BODY4]] ] +; CHECK-NEXT: [[IDX_EXT_I:%.*]] = zext i32 [[K_06]] to i64 ; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[IDX_EXT_I]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 ; CHECK-NEXT: store i32 [[ADD]], ptr [[ADD_PTR_I]], align 4 -; CHECK-NEXT: [[DEC]] = add i32 [[K_07]], -1 +; CHECK-NEXT: [[DEC]] = add i32 [[K_06]], -1 ; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i32 [[DEC]], 0 ; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll index b5bba73e05f83..20dfe4b6e4e61 100644 --- a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll +++ b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll @@ -47,11 +47,11 @@ define void @_Z4loopi(i32 %width) { ; HOIST-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 ; HOIST-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; HOIST: for.cond.preheader: -; HOIST-NEXT: [[SUB:%.*]] = add nsw i32 [[WIDTH]], -1 +; HOIST-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 ; HOIST-NEXT: br label [[FOR_COND:%.*]] ; HOIST: for.cond: ; HOIST-NEXT: [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ] -; HOIST-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[SUB]] +; HOIST-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[TMP0]] ; HOIST-NEXT: tail call void @f0() ; HOIST-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] ; HOIST: for.cond.cleanup: @@ -59,7 +59,7 @@ define void @_Z4loopi(i32 %width) { ; HOIST-NEXT: br label [[RETURN]] ; HOIST: for.body: ; HOIST-NEXT: tail call void @f1() -; HOIST-NEXT: [[INC]] = add nuw i32 [[I_0]], 1 +; HOIST-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 ; HOIST-NEXT: br label [[FOR_COND]] ; HOIST: return: ; HOIST-NEXT: ret void @@ -69,21 +69,19 @@ define void @_Z4loopi(i32 %width) { ; ROTATE-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 ; ROTATE-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATE: for.cond.preheader: -; ROTATE-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATE-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] -; ROTATE: for.body.preheader: -; ROTATE-NEXT: [[TMP0:%.*]] = add i32 [[WIDTH]], -2 -; ROTATE-NEXT: br label [[FOR_BODY:%.*]] +; ROTATE-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 +; ROTATE-NEXT: [[EXITCOND3_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; ROTATE-NEXT: br i1 [[EXITCOND3_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] ; ROTATE: for.cond.cleanup: ; ROTATE-NEXT: tail call void @f0() ; ROTATE-NEXT: tail call void @f2() ; ROTATE-NEXT: br label [[RETURN]] ; ROTATE: for.body: -; ROTATE-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; ROTATE-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] ; ROTATE-NEXT: tail call void @f0() ; ROTATE-NEXT: tail call void @f1() -; ROTATE-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 -; ROTATE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_04]], [[TMP0]] +; ROTATE-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; ROTATE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] ; ROTATE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATE: return: ; ROTATE-NEXT: ret void diff --git a/llvm/test/Transforms/PhaseOrdering/runtime-check-removal.ll b/llvm/test/Transforms/PhaseOrdering/runtime-check-removal.ll index c159d1b686787..d327a5deea1aa 100644 --- a/llvm/test/Transforms/PhaseOrdering/runtime-check-removal.ll +++ b/llvm/test/Transforms/PhaseOrdering/runtime-check-removal.ll @@ -10,20 +10,15 @@ define void @test_remove_check_with_incrementing_integer_induction(i16 %start, i ; CHECK-LABEL: @test_remove_check_with_incrementing_integer_induction( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LEN:%.*]] = zext i8 [[LEN_N:%.*]] to i16 -; CHECK-NEXT: [[LEN_NEG_NOT:%.*]] = icmp ult i16 [[LEN]], [[A:%.*]] -; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 [[LEN_N]], 0 -; CHECK-NEXT: [[OR_COND3:%.*]] = and i1 [[LEN_NEG_NOT]], [[C1]] -; CHECK-NEXT: br i1 [[OR_COND3]], label [[LOOP_LATCH_PREHEADER:%.*]], label [[EXIT:%.*]] -; CHECK: loop.latch.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[A]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i16 [[LEN]], -1 -; CHECK-NEXT: [[UMIN:%.*]] = tail call i16 @llvm.umin.i16(i16 [[TMP0]], i16 [[TMP1]]) -; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] +; CHECK-NEXT: [[LEN_NEG_NOT:%.*]] = icmp uge i16 [[LEN]], [[A:%.*]] +; CHECK-NEXT: [[EXITCOND1_NOT:%.*]] = icmp eq i8 [[LEN_N]], 0 +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[LEN_NEG_NOT]], [[EXITCOND1_NOT]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[LOOP_LATCH:%.*]] ; CHECK: loop.latch: -; CHECK-NEXT: [[IV2:%.*]] = phi i16 [ [[IV_NEXT:%.*]], [[LOOP_LATCH]] ], [ 0, [[LOOP_LATCH_PREHEADER]] ] +; CHECK-NEXT: [[IV2:%.*]] = phi i16 [ [[IV_NEXT:%.*]], [[LOOP_LATCH]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: tail call void @use(i16 [[IV2]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i16 [[IV2]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i16 [[IV2]], [[UMIN]] +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i16 [[IV_NEXT]], [[LEN]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_LATCH]] ; CHECK: exit: ; CHECK-NEXT: ret void