From 88fc2082e23282146d30640995fb2aec701fe793 Mon Sep 17 00:00:00 2001 From: Vedant Paranjape Date: Mon, 15 Sep 2025 23:59:35 -0400 Subject: [PATCH 01/10] [SimplifyCFG] Hoist out implied conditions from successor In some cases successor can have conditions that imply the current branching condition. If we instead evaluate these implied condition before the current branching condition. This will result in more fine tuned branching condition. We skip this optimization if the true path branch has side effects. Fixes #155986 Validity of updated testcases ----------------------------- 1) SimplifyCFG/pr55765.ll -> https://alive2.llvm.org/ce/z/N_LZoE 2) SimplifyCFG/fold-branch-to-common-dest.ll -> https://alive2.llvm.org/ce/z/7-LL0Y 3) LoopVectorize/float-induction.ll -> https://alive2.llvm.org/ce/z/vuMNEJ --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 44 ++++ .../LoopVectorize/float-induction.ll | 196 +++--------------- .../SimplifyCFG/fold-branch-to-common-dest.ll | 7 +- .../SimplifyCFG/hoist-implied-condition.ll | 78 +++++++ llvm/test/Transforms/SimplifyCFG/pr55765.ll | 11 +- 5 files changed, 160 insertions(+), 176 deletions(-) create mode 100644 llvm/test/Transforms/SimplifyCFG/hoist-implied-condition.ll diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index a1f759dd1df83..e8fe7cd951015 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1853,6 +1853,47 @@ static void hoistConditionalLoadsStores( } } +static bool hoistImplyingConditions(BranchInst *BI, IRBuilder<> &Builder, + const DataLayout &DL) { + if (!isa(BI->getCondition())) + return false; + + ICmpInst *branchCond = cast(BI->getCondition()); + BasicBlock *truePathBB = BI->getSuccessor(0); + + for (auto &I : *truePathBB) + if (I.mayHaveSideEffects()) + return false; + + for (auto &I : *truePathBB) { + if (isa(I)) { + ICmpInst *impliedICmp = cast(&I); + if (impliedICmp->getPredicate() == branchCond->getPredicate() && + impliedICmp->getOperand(0) == branchCond->getOperand(0) && + impliedICmp->getOperand(1) == branchCond->getOperand(1)) { + // found the same condition, so we can skip processing this. + continue; + } + + std::optional Imp = isImpliedCondition(impliedICmp, branchCond, DL); + if (Imp == true) { + Builder.SetInsertPoint(BI); + Value *newBranchCond = Builder.CreateICmp(impliedICmp->getPredicate(), + impliedICmp->getOperand(0), + impliedICmp->getOperand(1)); + + branchCond->replaceAllUsesWith(newBranchCond); + branchCond->eraseFromParent(); + impliedICmp->replaceAllUsesWith( + ConstantInt::getTrue(truePathBB->getContext())); + return true; + } + } + } + + return false; +} + static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI) { // Not handle volatile or atomic. @@ -8121,6 +8162,9 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (simplifyBranchOnICmpChain(BI, Builder, DL)) return true; + if (hoistImplyingConditions(BI, Builder, DL)) + return requestResimplify(); + // If this basic block has dominating predecessor blocks and the dominating // blocks' conditions imply BI's condition, we know the direction of BI. std::optional Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL); diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index 8a3cad0681013..67cac13db4c96 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -184,50 +184,23 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; ; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop1_fast_FMF( ; VEC2_INTERL1_PRED_STORE-NEXT: entry: -; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp eq i32 [[N:%.*]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; VEC2_INTERL1_PRED_STORE: for.body.lr.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[FPINC:%.*]] = load float, ptr @fp_inc, align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 ; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] -; VEC2_INTERL1_PRED_STORE: vector.ph: -; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[DOTSPLAT3]], -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT]], [[TMP2]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] -; VEC2_INTERL1_PRED_STORE: vector.body: -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP4]], align 4 -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub fast <2 x float> [[VEC_IND]], [[DOTSPLAT5]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] -; VEC2_INTERL1_PRED_STORE: middle.block: -; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[VECTOR_BODY]] ], [ [[INIT:%.*]], [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: for.end: ; VEC2_INTERL1_PRED_STORE-NEXT: ret void ; @@ -431,50 +404,23 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; ; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop1_reassoc_FMF( ; VEC2_INTERL1_PRED_STORE-NEXT: entry: -; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp eq i32 [[N:%.*]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; VEC2_INTERL1_PRED_STORE: for.body.lr.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[FPINC:%.*]] = load float, ptr @fp_inc, align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 ; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] -; VEC2_INTERL1_PRED_STORE: vector.ph: -; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fmul reassoc <2 x float> [[DOTSPLAT3]], -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub reassoc <2 x float> [[DOTSPLAT]], [[TMP2]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], 2.000000e+00 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] -; VEC2_INTERL1_PRED_STORE: vector.body: -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP4]], align 4 -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <2 x float> [[VEC_IND]], [[DOTSPLAT5]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] -; VEC2_INTERL1_PRED_STORE: middle.block: -; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[VECTOR_BODY]] ], [ [[INIT:%.*]], [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: for.end: ; VEC2_INTERL1_PRED_STORE-NEXT: ret void ; @@ -665,43 +611,22 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; ; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop2( ; VEC2_INTERL1_PRED_STORE-NEXT: entry: -; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp eq i32 [[N:%.*]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; VEC2_INTERL1_PRED_STORE: for.body.preheader: ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 ; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] -; VEC2_INTERL1_PRED_STORE: vector.ph: -; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 -; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[INIT:%.*]], [[TMP1]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] -; VEC2_INTERL1_PRED_STORE: vector.body: -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP2]], align 4 -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float 1.000000e+00) -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] -; VEC2_INTERL1_PRED_STORE: middle.block: -; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[VECTOR_BODY]] ], [ [[INIT:%.*]], [[FOR_BODY_PREHEADER]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: for.end: ; VEC2_INTERL1_PRED_STORE-NEXT: ret void ; @@ -989,71 +914,30 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; ; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop3( ; VEC2_INTERL1_PRED_STORE-NEXT: entry: -; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP9:%.*]] = icmp eq i32 [[N:%.*]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; VEC2_INTERL1_PRED_STORE: for.body.lr.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = load float, ptr @fp_inc, align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = zext nneg i32 [[N]] to i64 ; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] -; VEC2_INTERL1_PRED_STORE: vector.ph: -; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], 2147483646 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fmul fast float [[DOTCAST]], -5.000000e-01 -; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT5]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP13:%.*]] = fmul fast <2 x float> [[DOTSPLAT3]], -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT6]], [[TMP13]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], 2.000000e+00 -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] -; VEC2_INTERL1_PRED_STORE: vector.body: -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND9:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND9]], ptr [[TMP6]], align 4 -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[VEC_IND]], splat (float -5.000000e-01) -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[TMP8]], [[TMP7]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] -; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP9]], ptr [[TMP10]], align 4 -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] -; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP8]], ptr [[TMP11]], align 4 -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float -1.000000e+00) -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] -; VEC2_INTERL1_PRED_STORE: middle.block: -; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP1]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[Y_012:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_011:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[Y_012:%.*]] = phi float [ [[CONV1:%.*]], [[VECTOR_BODY]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_011:%.*]] = phi float [ [[ADD:%.*]], [[VECTOR_BODY]] ], [ [[INIT:%.*]], [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_011]], ptr [[ARRAYIDX]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01 ; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[ADD2]], ptr [[ARRAYIDX4]], align 4 -; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[CONV1]], ptr [[ARRAYIDX6]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: for.end: ; VEC2_INTERL1_PRED_STORE-NEXT: ret void ; @@ -1246,40 +1130,22 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; ; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop4( ; VEC2_INTERL1_PRED_STORE-NEXT: entry: -; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp eq i32 [[N:%.*]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; VEC2_INTERL1_PRED_STORE: for.body.preheader: ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 ; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] -; VEC2_INTERL1_PRED_STORE: vector.ph: -; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 -; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] -; VEC2_INTERL1_PRED_STORE: vector.body: -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP2]], align 4 -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float 1.000000e+00) -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] -; VEC2_INTERL1_PRED_STORE: middle.block: -; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[VECTOR_BODY]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: for.end: ; VEC2_INTERL1_PRED_STORE-NEXT: ret void ; @@ -1594,7 +1460,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC2_INTERL1_PRED_STORE: pred.store.continue3: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: middle.block: ; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY]] @@ -1612,7 +1478,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC2_INTERL1_PRED_STORE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00 ; VEC2_INTERL1_PRED_STORE-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP9:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: for.end: ; VEC2_INTERL1_PRED_STORE-NEXT: ret void ; @@ -1722,7 +1588,7 @@ define i32 @float_induction_with_dbg_on_fadd(ptr %dst) { ; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> poison, ptr [[TMP0]], align 8 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: exit: ; VEC2_INTERL1_PRED_STORE-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll index 7b88ec338cf5e..fd0f807975595 100644 --- a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll +++ b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll @@ -1030,10 +1030,11 @@ define void @pr49510() { ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @global_pr49510, align 1 -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i16 [[TMP0]], 0 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[TMP0]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[TOBOOL]], [[CMP]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_COND]], label [[FOR_END:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LAND_RHS:%.*]], label [[FOR_END:%.*]] +; CHECK: land.rhs: +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i16 [[TMP0]], 0 +; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-implied-condition.ll b/llvm/test/Transforms/SimplifyCFG/hoist-implied-condition.ll new file mode 100644 index 0000000000000..ad5fe7ca32ca8 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/hoist-implied-condition.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s + +define i32 @src(ptr %contents.0, i64 %contents.1) { +; CHECK-LABEL: define i32 @src( +; CHECK-SAME: ptr [[CONTENTS_0:%.*]], i64 [[CONTENTS_1:%.*]]) { +; CHECK-NEXT: [[START:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[CONTENTS_1]], 16 +; CHECK-NEXT: br i1 [[TMP0]], label %[[IF:.*]], label %[[EXIT:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[CONTENTS_0]], align 4 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[LOAD]], 123 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[CONTENTS_1]], 16 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP2]], true +; CHECK-NEXT: br i1 [[AND]], label %[[COMMON_RET:.*]], label %[[EXIT]] +; CHECK: [[COMMON_RET]]: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, %[[EXIT]] ], [ 1, %[[IF]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: br label %[[COMMON_RET]] +; +start: + %cmp1 = icmp ugt i64 %contents.1, 7 + br i1 %cmp1, label %if, label %exit + +if: + %load = load i64, ptr %contents.0 + %cmp2 = icmp eq i64 %load, 123 + %cmp3 = icmp eq i64 %contents.1, 16 + %and = and i1 %cmp2, %cmp3 + br i1 %and, label %if2, label %exit + +if2: + ret i32 1 + +exit: + ret i32 0 +} + +define i32 @src-sideeffects(ptr %contents.0, i64 %contents.1) { +; CHECK-LABEL: define i32 @src-sideeffects( +; CHECK-SAME: ptr [[CONTENTS_0:%.*]], i64 [[CONTENTS_1:%.*]]) { +; CHECK-NEXT: [[START:.*:]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i64 [[CONTENTS_1]], 7 +; CHECK-NEXT: br i1 [[CMP1]], label %[[IF:.*]], label %[[EXIT:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[CONTENTS_0]], align 4 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[LOAD]], 123 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[CONTENTS_1]], 16 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP2]], [[CMP3]] +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[LOAD]], [[CONTENTS_1]] +; CHECK-NEXT: store i64 [[ADD]], ptr [[CONTENTS_0]], align 4 +; CHECK-NEXT: br i1 [[AND]], label %[[COMMON_RET:.*]], label %[[EXIT]] +; CHECK: [[COMMON_RET]]: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, %[[EXIT]] ], [ 1, %[[IF]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: br label %[[COMMON_RET]] +; +start: + %cmp1 = icmp ugt i64 %contents.1, 7 + br i1 %cmp1, label %if, label %exit + +if: + %load = load i64, ptr %contents.0 + %cmp2 = icmp eq i64 %load, 123 + %cmp3 = icmp eq i64 %contents.1, 16 + %and = and i1 %cmp2, %cmp3 + %add = add i64 %load, %contents.1 + store i64 %add, ptr %contents.0 + br i1 %and, label %if2, label %exit + +if2: + ret i32 1 + +exit: + ret i32 0 +} diff --git a/llvm/test/Transforms/SimplifyCFG/pr55765.ll b/llvm/test/Transforms/SimplifyCFG/pr55765.ll index 7167d0d445ade..5fbfcc1feda47 100644 --- a/llvm/test/Transforms/SimplifyCFG/pr55765.ll +++ b/llvm/test/Transforms/SimplifyCFG/pr55765.ll @@ -10,21 +10,16 @@ define i32 @main(i1 %c1, i1 %c2, i32 %y) { ; CHECK-LABEL: @main( ; CHECK-NEXT: br i1 [[C1:%.*]], label [[EXIT:%.*]], label [[LOOP_PRE_PREHEADER:%.*]] ; CHECK: loop.pre.preheader: -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[Y:%.*]], -1 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[Y:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[EXIT]] ; CHECK: loop.preheader: ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: br i1 [[C1]], label [[LOOP2:%.*]], label [[LOOP_LATCH:%.*]] ; CHECK: loop.latch: -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP]], label [[EXIT]] ; CHECK: loop2: -; CHECK-NEXT: br i1 [[CMP2]], label [[JOIN:%.*]], label [[IF:%.*]] -; CHECK: if: -; CHECK-NEXT: call void @dummy() -; CHECK-NEXT: br label [[JOIN]] -; CHECK: join: ; CHECK-NEXT: br i1 [[C2:%.*]], label [[LOOP2]], label [[LOOP_LATCH]] ; CHECK: exit: ; CHECK-NEXT: ret i32 0 From 80a57b818a591106e23019fdc4a34adedbdcb1f0 Mon Sep 17 00:00:00 2001 From: Vedant Paranjape Date: Wed, 17 Sep 2025 16:38:22 -0400 Subject: [PATCH 02/10] Reimplement based on feedback --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 128 ++++++++++++++++------ 1 file changed, 97 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index e8fe7cd951015..9c51c43133964 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1853,45 +1853,111 @@ static void hoistConditionalLoadsStores( } } +static std::optional +visitConditions(Value *V, const Value *baseCond, const BasicBlock *contextBB, + SmallVectorImpl &impliedConditions, + const DataLayout &DL) { + if (!isa(V)) + return std::nullopt; + + Instruction *I = cast(V); + // we only care about conditions in the same basic block + if (contextBB != I->getParent()) + return std::nullopt; + + std::optional Imp = isImpliedCondition(V, baseCond, DL); + // TODO: Handle negated condition case. + if (Imp != true) + return std::nullopt; + + std::optional LHS = visitConditions(I->getOperand(0), baseCond, + contextBB, impliedConditions, DL); + std::optional RHS = visitConditions(I->getOperand(1), baseCond, + contextBB, impliedConditions, DL); + + if (!LHS.has_value() && !RHS.has_value()) { + impliedConditions.push_back(I); + } + + return Imp; +} + static bool hoistImplyingConditions(BranchInst *BI, IRBuilder<> &Builder, const DataLayout &DL) { - if (!isa(BI->getCondition())) + // Only look for CFG like + // A -> B, C + // B -> D, C + // or + // A -> B, C + // B -> C, D + // TODO: Handle the false branch case as well. + BasicBlock *parentTrueBB = BI->getSuccessor(0); + BasicBlock *parentFalseBB = BI->getSuccessor(1); + + if (!isa(parentTrueBB->getTerminator())) return false; - ICmpInst *branchCond = cast(BI->getCondition()); - BasicBlock *truePathBB = BI->getSuccessor(0); + BranchInst *childBI = cast(parentTrueBB->getTerminator()); + // TODO: Handle the unconditional branch case. + if (childBI->isUnconditional()) + return false; - for (auto &I : *truePathBB) - if (I.mayHaveSideEffects()) - return false; + BasicBlock *childTrueBB = childBI->getSuccessor(0); + BasicBlock *childFalseBB = childBI->getSuccessor(1); + if (parentFalseBB != childTrueBB && parentFalseBB != childFalseBB) + return false; - for (auto &I : *truePathBB) { - if (isa(I)) { - ICmpInst *impliedICmp = cast(&I); - if (impliedICmp->getPredicate() == branchCond->getPredicate() && - impliedICmp->getOperand(0) == branchCond->getOperand(0) && - impliedICmp->getOperand(1) == branchCond->getOperand(1)) { - // found the same condition, so we can skip processing this. - continue; - } + // Avoid cases that have loops for simplicity. + if (childTrueBB == BI->getParent() || childFalseBB == BI->getParent()) + return false; - std::optional Imp = isImpliedCondition(impliedICmp, branchCond, DL); - if (Imp == true) { - Builder.SetInsertPoint(BI); - Value *newBranchCond = Builder.CreateICmp(impliedICmp->getPredicate(), - impliedICmp->getOperand(0), - impliedICmp->getOperand(1)); - - branchCond->replaceAllUsesWith(newBranchCond); - branchCond->eraseFromParent(); - impliedICmp->replaceAllUsesWith( - ConstantInt::getTrue(truePathBB->getContext())); - return true; - } - } - } + auto NoSideEffects = [](BasicBlock &BB) { + return llvm::none_of(BB, [](const Instruction &I) { + return I.mayWriteToMemory() || I.mayHaveSideEffects(); + }); + }; + // If the basic blocks have side effects, don't hoist conditions. + if (!NoSideEffects(*parentTrueBB) || !NoSideEffects(*parentFalseBB)) + return false; - return false; + bool isCommonBBonTruePath = (parentFalseBB == childTrueBB); + // Check if parent branch condition is implied by the child branch + // condition. If so, we can hoist the child branch condition to the + // parent branch. For example: + // Parent branch condition: x > y + // Child branch condition: x == z (given z > y) + // We can hoist x == z to the parent branch and eliminate x > y + // condition check as x == z is a much stronger branch condition. + // So it will result in the true path being taken less often. + // Now that we know childBI condition implies parent BI condition, + // we need to find out which conditions to hoist out. + SmallVector hoistCandidates; + std::optional Imp = + visitConditions(childBI->getCondition(), BI->getCondition(), + (!isCommonBBonTruePath ? parentTrueBB : parentFalseBB), + hoistCandidates, DL); + // We found no implication relationship. + if (!Imp.has_value()) + return false; + + // TODO: Handle negated condition case. + if (Imp == false) + return false; + + // We don't handle multiple hoist candidates for now. + if (hoistCandidates.size() > 1) + return false; + + // We can hoist the condition. + Instruction *parentBranchCond = dyn_cast(BI->getCondition()); + Builder.SetInsertPoint(BI); + Instruction *hoistedCondition = Builder.Insert(hoistCandidates[0]->clone()); + parentBranchCond->replaceAllUsesWith(hoistedCondition); + parentBranchCond->eraseFromParent(); + hoistCandidates[0]->replaceAllUsesWith( + ConstantInt::getTrue(parentTrueBB->getContext())); + + return true; } static bool isSafeCheapLoadStore(const Instruction *I, From cc19af7076d1c0dd13f88dab50cc46d7da194724 Mon Sep 17 00:00:00 2001 From: Vedant Paranjape Date: Wed, 17 Sep 2025 16:58:18 -0400 Subject: [PATCH 03/10] revert some testcases --- .../Transforms/SimplifyCFG/fold-branch-to-common-dest.ll | 7 +++---- llvm/test/Transforms/SimplifyCFG/pr55765.ll | 7 ++++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll index fd0f807975595..7b88ec338cf5e 100644 --- a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll +++ b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll @@ -1030,11 +1030,10 @@ define void @pr49510() { ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @global_pr49510, align 1 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i16 [[TMP0]], 0 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[TMP0]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[LAND_RHS:%.*]], label [[FOR_END:%.*]] -; CHECK: land.rhs: -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i16 [[TMP0]], 0 -; CHECK-NEXT: br label [[FOR_COND]] +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[TOBOOL]], [[CMP]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_COND]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SimplifyCFG/pr55765.ll b/llvm/test/Transforms/SimplifyCFG/pr55765.ll index 5fbfcc1feda47..834ca4db5afed 100644 --- a/llvm/test/Transforms/SimplifyCFG/pr55765.ll +++ b/llvm/test/Transforms/SimplifyCFG/pr55765.ll @@ -10,7 +10,7 @@ define i32 @main(i1 %c1, i1 %c2, i32 %y) { ; CHECK-LABEL: @main( ; CHECK-NEXT: br i1 [[C1:%.*]], label [[EXIT:%.*]], label [[LOOP_PRE_PREHEADER:%.*]] ; CHECK: loop.pre.preheader: -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[Y:%.*]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[Y:%.*]], -1 ; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[EXIT]] ; CHECK: loop.preheader: ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0 @@ -20,6 +20,11 @@ define i32 @main(i1 %c1, i1 %c2, i32 %y) { ; CHECK: loop.latch: ; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP]], label [[EXIT]] ; CHECK: loop2: +; CHECK-NEXT: br i1 [[CMP2]], label [[JOIN:%.*]], label [[IF:%.*]] +; CHECK: if: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: ; CHECK-NEXT: br i1 [[C2:%.*]], label [[LOOP2]], label [[LOOP_LATCH]] ; CHECK: exit: ; CHECK-NEXT: ret i32 0 From ed0a2a8ba679e9ee660459e58cecb6ba1ed8b4b1 Mon Sep 17 00:00:00 2001 From: Vedant Paranjape Date: Wed, 17 Sep 2025 16:59:39 -0400 Subject: [PATCH 04/10] nitpick --- llvm/test/Transforms/SimplifyCFG/pr55765.ll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/Transforms/SimplifyCFG/pr55765.ll b/llvm/test/Transforms/SimplifyCFG/pr55765.ll index 834ca4db5afed..7167d0d445ade 100644 --- a/llvm/test/Transforms/SimplifyCFG/pr55765.ll +++ b/llvm/test/Transforms/SimplifyCFG/pr55765.ll @@ -10,15 +10,15 @@ define i32 @main(i1 %c1, i1 %c2, i32 %y) { ; CHECK-LABEL: @main( ; CHECK-NEXT: br i1 [[C1:%.*]], label [[EXIT:%.*]], label [[LOOP_PRE_PREHEADER:%.*]] ; CHECK: loop.pre.preheader: -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[Y:%.*]], -1 -; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[EXIT]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[Y:%.*]], -1 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT]] ; CHECK: loop.preheader: ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: br i1 [[C1]], label [[LOOP2:%.*]], label [[LOOP_LATCH:%.*]] ; CHECK: loop.latch: -; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP]], label [[EXIT]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]] ; CHECK: loop2: ; CHECK-NEXT: br i1 [[CMP2]], label [[JOIN:%.*]], label [[IF:%.*]] ; CHECK: if: From c96605bc8b41000d44e8fbc43ef41891d4d3124a Mon Sep 17 00:00:00 2001 From: Vedant Paranjape Date: Wed, 17 Sep 2025 17:03:02 -0400 Subject: [PATCH 05/10] revert LV testcases --- .../LoopVectorize/float-induction.ll | 196 +++++++++++++++--- 1 file changed, 165 insertions(+), 31 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index 67cac13db4c96..bfda36a4e0cb1 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -184,23 +184,50 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; ; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop1_fast_FMF( ; VEC2_INTERL1_PRED_STORE-NEXT: entry: -; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp eq i32 [[N:%.*]], 1 +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; VEC2_INTERL1_PRED_STORE: for.body.lr.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[FPINC:%.*]] = load float, ptr @fp_inc, align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 ; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] +; VEC2_INTERL1_PRED_STORE: vector.ph: +; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT2]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[BROADCAST_SPLAT]], [[TMP3]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] +; VEC2_INTERL1_PRED_STORE: vector.body: +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP5]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub fast <2 x float> [[VEC_IND]], [[BROADCAST_SPLAT4]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] +; VEC2_INTERL1_PRED_STORE: middle.block: +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[VECTOR_BODY]] ], [ [[INIT:%.*]], [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDVARS_IV]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: for.end: ; VEC2_INTERL1_PRED_STORE-NEXT: ret void ; @@ -404,23 +431,50 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; ; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop1_reassoc_FMF( ; VEC2_INTERL1_PRED_STORE-NEXT: entry: -; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp eq i32 [[N:%.*]], 1 +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; VEC2_INTERL1_PRED_STORE: for.body.lr.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[FPINC:%.*]] = load float, ptr @fp_inc, align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 ; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] +; VEC2_INTERL1_PRED_STORE: vector.ph: +; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul reassoc <2 x float> [[BROADCAST_SPLAT2]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub reassoc <2 x float> [[BROADCAST_SPLAT]], [[TMP3]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul reassoc float [[FPINC]], 2.000000e+00 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] +; VEC2_INTERL1_PRED_STORE: vector.body: +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP5]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <2 x float> [[VEC_IND]], [[BROADCAST_SPLAT4]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VEC2_INTERL1_PRED_STORE: middle.block: +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[VECTOR_BODY]] ], [ [[INIT:%.*]], [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDVARS_IV]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: for.end: ; VEC2_INTERL1_PRED_STORE-NEXT: ret void ; @@ -611,22 +665,43 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; ; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop2( ; VEC2_INTERL1_PRED_STORE-NEXT: entry: -; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp eq i32 [[N:%.*]], 1 +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; VEC2_INTERL1_PRED_STORE: for.body.preheader: ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 ; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] +; VEC2_INTERL1_PRED_STORE: vector.ph: +; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fadd fast float [[INIT:%.*]], [[TMP1]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[BROADCAST_SPLAT]], ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] +; VEC2_INTERL1_PRED_STORE: vector.body: +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP3]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float 1.000000e+00) +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; VEC2_INTERL1_PRED_STORE: middle.block: +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[VECTOR_BODY]] ], [ [[INIT:%.*]], [[FOR_BODY_PREHEADER]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDVARS_IV]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: for.end: ; VEC2_INTERL1_PRED_STORE-NEXT: ret void ; @@ -914,30 +989,71 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; ; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop3( ; VEC2_INTERL1_PRED_STORE-NEXT: entry: -; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP9:%.*]] = icmp eq i32 [[N:%.*]], 1 +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; VEC2_INTERL1_PRED_STORE: for.body.lr.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = load float, ptr @fp_inc, align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = zext nneg i32 [[N]] to i64 ; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] +; VEC2_INTERL1_PRED_STORE: vector.ph: +; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], 2147483646 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fmul fast float [[DOTCAST]], -5.000000e-01 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST1:%.*]] = uitofp nneg i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast float [[TMP0]], [[DOTCAST1]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fadd fast float [[INIT:%.*]], [[TMP4]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT5]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[BROADCAST_SPLAT3]], [[TMP6]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fmul fast float [[TMP0]], 2.000000e+00 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT6]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] +; VEC2_INTERL1_PRED_STORE: vector.body: +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND8:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT9:%.*]], [[VECTOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND8]], ptr [[TMP8]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[VEC_IND8]], [[BROADCAST_SPLAT]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = fadd fast <2 x float> [[VEC_IND]], splat (float -5.000000e-01) +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP11:%.*]] = fadd fast <2 x float> [[TMP10]], [[TMP9]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] +; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP11]], ptr [[TMP12]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] +; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP10]], ptr [[TMP13]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float -1.000000e+00) +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT9]] = fadd fast <2 x float> [[VEC_IND8]], [[BROADCAST_SPLAT7]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; VEC2_INTERL1_PRED_STORE: middle.block: +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP1]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[Y_012:%.*]] = phi float [ [[CONV1:%.*]], [[VECTOR_BODY]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_011:%.*]] = phi float [ [[ADD:%.*]], [[VECTOR_BODY]] ], [ [[INIT:%.*]], [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDVARS_IV]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[Y_012:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_011:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_011]], ptr [[ARRAYIDX]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01 ; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDVARS_IV]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[ADD2]], ptr [[ARRAYIDX4]], align 4 -; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDVARS_IV]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[CONV1]], ptr [[ARRAYIDX6]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: for.end: ; VEC2_INTERL1_PRED_STORE-NEXT: ret void ; @@ -1130,22 +1246,40 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; ; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop4( ; VEC2_INTERL1_PRED_STORE-NEXT: entry: -; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp eq i32 [[N:%.*]], 1 +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; VEC2_INTERL1_PRED_STORE: for.body.preheader: ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 ; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] +; VEC2_INTERL1_PRED_STORE: vector.ph: +; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] +; VEC2_INTERL1_PRED_STORE: vector.body: +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP3]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float 1.000000e+00) +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; VEC2_INTERL1_PRED_STORE: middle.block: +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[VECTOR_BODY]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDVARS_IV]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: for.end: ; VEC2_INTERL1_PRED_STORE-NEXT: ret void ; @@ -1460,7 +1594,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC2_INTERL1_PRED_STORE: pred.store.continue3: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: middle.block: ; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY]] @@ -1478,7 +1612,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC2_INTERL1_PRED_STORE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00 ; VEC2_INTERL1_PRED_STORE-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP9:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: for.end: ; VEC2_INTERL1_PRED_STORE-NEXT: ret void ; @@ -1588,7 +1722,7 @@ define i32 @float_induction_with_dbg_on_fadd(ptr %dst) { ; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> poison, ptr [[TMP0]], align 8 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: exit: ; VEC2_INTERL1_PRED_STORE-NEXT: ret i32 0 ; From 2c588fd8068aa7fd401008ec9a37cbcbdd450d76 Mon Sep 17 00:00:00 2001 From: Vedant Paranjape Date: Wed, 17 Sep 2025 17:05:39 -0400 Subject: [PATCH 06/10] nit --- .../LoopVectorize/float-induction.ll | 152 +++++++++--------- 1 file changed, 76 insertions(+), 76 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index bfda36a4e0cb1..8a3cad0681013 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -195,32 +195,32 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT2]], -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[BROADCAST_SPLAT]], [[TMP3]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[DOTSPLAT3]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT]], [[TMP2]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP5]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP4]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub fast <2 x float> [[VEC_IND]], [[BROADCAST_SPLAT4]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub fast <2 x float> [[VEC_IND]], [[DOTSPLAT5]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: middle.block: ; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] ; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] @@ -442,32 +442,32 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul reassoc <2 x float> [[BROADCAST_SPLAT2]], -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub reassoc <2 x float> [[BROADCAST_SPLAT]], [[TMP3]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul reassoc float [[FPINC]], 2.000000e+00 -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fmul reassoc <2 x float> [[DOTSPLAT3]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub reassoc <2 x float> [[DOTSPLAT]], [[TMP2]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], 2.000000e+00 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP5]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP4]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <2 x float> [[VEC_IND]], [[BROADCAST_SPLAT4]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <2 x float> [[VEC_IND]], [[DOTSPLAT5]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: middle.block: ; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] ; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] @@ -675,26 +675,26 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fadd fast float [[INIT:%.*]], [[TMP1]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[BROADCAST_SPLAT]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[INIT:%.*]], [[TMP1]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP3]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float 1.000000e+00) -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: middle.block: ; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ] ; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 @@ -1000,47 +1000,47 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], 2147483646 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fmul fast float [[DOTCAST]], -5.000000e-01 -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST1:%.*]] = uitofp nneg i64 [[N_VEC]] to float -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast float [[TMP0]], [[DOTCAST1]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fadd fast float [[INIT:%.*]], [[TMP4]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT5]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP13:%.*]] = fmul fast <2 x float> [[DOTSPLAT3]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT6]], [[TMP13]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], 2.000000e+00 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT5]], -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[BROADCAST_SPLAT3]], [[TMP6]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fmul fast float [[TMP0]], 2.000000e+00 -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT6]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND8:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT9:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND8]], ptr [[TMP8]], align 4 -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[VEC_IND8]], [[BROADCAST_SPLAT]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = fadd fast <2 x float> [[VEC_IND]], splat (float -5.000000e-01) -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP11:%.*]] = fadd fast <2 x float> [[TMP10]], [[TMP9]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] -; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP11]], ptr [[TMP12]], align 4 -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] -; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP10]], ptr [[TMP13]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND9:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND9]], ptr [[TMP6]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[VEC_IND]], splat (float -5.000000e-01) +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[TMP8]], [[TMP7]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] +; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP9]], ptr [[TMP10]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] +; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP8]], ptr [[TMP11]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float -1.000000e+00) -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT9]] = fadd fast <2 x float> [[VEC_IND8]], [[BROADCAST_SPLAT7]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: middle.block: ; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP1]] ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[Y_012:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_011:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[Y_012:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_011:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] ; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_011]], ptr [[ARRAYIDX]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] @@ -1256,23 +1256,23 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 +; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP3]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float 1.000000e+00) -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: middle.block: ; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] ; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 From fb6e1426b51894172555a279d5cea212d648584f Mon Sep 17 00:00:00 2001 From: Vedant Paranjape Date: Wed, 17 Sep 2025 17:15:25 -0400 Subject: [PATCH 07/10] refactor --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 73 ++++++++++--------- .../SimplifyCFG/hoist-implied-condition.ll | 56 +++++--------- 2 files changed, 59 insertions(+), 70 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 9c51c43133964..1b71087d53508 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1854,29 +1854,34 @@ static void hoistConditionalLoadsStores( } static std::optional -visitConditions(Value *V, const Value *baseCond, const BasicBlock *contextBB, - SmallVectorImpl &impliedConditions, +visitConditions(Value *V, const Value *BaseCond, const BasicBlock *ContextBB, + SmallVectorImpl &ImpliedConditions, const DataLayout &DL) { - if (!isa(V)) + Instruction *I = dyn_cast(V); + if (!I) return std::nullopt; - Instruction *I = cast(V); // we only care about conditions in the same basic block - if (contextBB != I->getParent()) + if (ContextBB != I->getParent()) + return std::nullopt; + + // isImpliedCondition only handles integer conditions. + if (!I->getType()->isIntOrIntVectorTy(1) || + !BaseCond->getType()->isIntOrIntVectorTy(1)) return std::nullopt; - std::optional Imp = isImpliedCondition(V, baseCond, DL); + std::optional Imp = isImpliedCondition(V, BaseCond, DL); // TODO: Handle negated condition case. if (Imp != true) return std::nullopt; - std::optional LHS = visitConditions(I->getOperand(0), baseCond, - contextBB, impliedConditions, DL); - std::optional RHS = visitConditions(I->getOperand(1), baseCond, - contextBB, impliedConditions, DL); + std::optional LHS = visitConditions(I->getOperand(0), BaseCond, + ContextBB, ImpliedConditions, DL); + std::optional RHS = visitConditions(I->getOperand(1), BaseCond, + ContextBB, ImpliedConditions, DL); if (!LHS.has_value() && !RHS.has_value()) { - impliedConditions.push_back(I); + ImpliedConditions.push_back(I); } return Imp; @@ -1891,24 +1896,24 @@ static bool hoistImplyingConditions(BranchInst *BI, IRBuilder<> &Builder, // A -> B, C // B -> C, D // TODO: Handle the false branch case as well. - BasicBlock *parentTrueBB = BI->getSuccessor(0); - BasicBlock *parentFalseBB = BI->getSuccessor(1); + BasicBlock *ParentTrueBB = BI->getSuccessor(0); + BasicBlock *ParentFalseBB = BI->getSuccessor(1); - if (!isa(parentTrueBB->getTerminator())) + BranchInst *ChildBI = dyn_cast(ParentTrueBB->getTerminator()); + if (!ChildBI) return false; - BranchInst *childBI = cast(parentTrueBB->getTerminator()); // TODO: Handle the unconditional branch case. - if (childBI->isUnconditional()) + if (ChildBI->isUnconditional()) return false; - BasicBlock *childTrueBB = childBI->getSuccessor(0); - BasicBlock *childFalseBB = childBI->getSuccessor(1); - if (parentFalseBB != childTrueBB && parentFalseBB != childFalseBB) + BasicBlock *ChildTrueBB = ChildBI->getSuccessor(0); + BasicBlock *ChildFalseBB = ChildBI->getSuccessor(1); + if (ParentFalseBB != ChildTrueBB && ParentFalseBB != ChildFalseBB) return false; // Avoid cases that have loops for simplicity. - if (childTrueBB == BI->getParent() || childFalseBB == BI->getParent()) + if (ChildTrueBB == BI->getParent() || ChildFalseBB == BI->getParent()) return false; auto NoSideEffects = [](BasicBlock &BB) { @@ -1917,10 +1922,10 @@ static bool hoistImplyingConditions(BranchInst *BI, IRBuilder<> &Builder, }); }; // If the basic blocks have side effects, don't hoist conditions. - if (!NoSideEffects(*parentTrueBB) || !NoSideEffects(*parentFalseBB)) + if (!NoSideEffects(*ParentTrueBB) || !NoSideEffects(*ParentFalseBB)) return false; - bool isCommonBBonTruePath = (parentFalseBB == childTrueBB); + bool IsCommonBBonTruePath = (ParentFalseBB == ChildTrueBB); // Check if parent branch condition is implied by the child branch // condition. If so, we can hoist the child branch condition to the // parent branch. For example: @@ -1929,13 +1934,13 @@ static bool hoistImplyingConditions(BranchInst *BI, IRBuilder<> &Builder, // We can hoist x == z to the parent branch and eliminate x > y // condition check as x == z is a much stronger branch condition. // So it will result in the true path being taken less often. - // Now that we know childBI condition implies parent BI condition, + // Now that we know ChildBI condition implies parent BI condition, // we need to find out which conditions to hoist out. - SmallVector hoistCandidates; + SmallVector HoistCandidates; std::optional Imp = - visitConditions(childBI->getCondition(), BI->getCondition(), - (!isCommonBBonTruePath ? parentTrueBB : parentFalseBB), - hoistCandidates, DL); + visitConditions(ChildBI->getCondition(), BI->getCondition(), + (!IsCommonBBonTruePath ? ParentTrueBB : ParentFalseBB), + HoistCandidates, DL); // We found no implication relationship. if (!Imp.has_value()) return false; @@ -1945,17 +1950,17 @@ static bool hoistImplyingConditions(BranchInst *BI, IRBuilder<> &Builder, return false; // We don't handle multiple hoist candidates for now. - if (hoistCandidates.size() > 1) + if (HoistCandidates.size() > 1) return false; // We can hoist the condition. - Instruction *parentBranchCond = dyn_cast(BI->getCondition()); + Instruction *ParentBranchCond = dyn_cast(BI->getCondition()); Builder.SetInsertPoint(BI); - Instruction *hoistedCondition = Builder.Insert(hoistCandidates[0]->clone()); - parentBranchCond->replaceAllUsesWith(hoistedCondition); - parentBranchCond->eraseFromParent(); - hoistCandidates[0]->replaceAllUsesWith( - ConstantInt::getTrue(parentTrueBB->getContext())); + Instruction *HoistedCondition = Builder.Insert(HoistCandidates[0]->clone()); + ParentBranchCond->replaceAllUsesWith(HoistedCondition); + ParentBranchCond->eraseFromParent(); + HoistCandidates[0]->replaceAllUsesWith( + ConstantInt::getTrue(ParentTrueBB->getContext())); return true; } diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-implied-condition.ll b/llvm/test/Transforms/SimplifyCFG/hoist-implied-condition.ll index ad5fe7ca32ca8..565e2df6f4440 100644 --- a/llvm/test/Transforms/SimplifyCFG/hoist-implied-condition.ll +++ b/llvm/test/Transforms/SimplifyCFG/hoist-implied-condition.ll @@ -2,23 +2,7 @@ ; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s define i32 @src(ptr %contents.0, i64 %contents.1) { -; CHECK-LABEL: define i32 @src( -; CHECK-SAME: ptr [[CONTENTS_0:%.*]], i64 [[CONTENTS_1:%.*]]) { -; CHECK-NEXT: [[START:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[CONTENTS_1]], 16 -; CHECK-NEXT: br i1 [[TMP0]], label %[[IF:.*]], label %[[EXIT:.*]] -; CHECK: [[IF]]: -; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[CONTENTS_0]], align 4 -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[LOAD]], 123 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[CONTENTS_1]], 16 -; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP2]], true -; CHECK-NEXT: br i1 [[AND]], label %[[COMMON_RET:.*]], label %[[EXIT]] -; CHECK: [[COMMON_RET]]: -; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, %[[EXIT]] ], [ 1, %[[IF]] ] -; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: br label %[[COMMON_RET]] -; + start: %cmp1 = icmp ugt i64 %contents.1, 7 br i1 %cmp1, label %if, label %exit @@ -37,26 +21,26 @@ exit: ret i32 0 } +define i32 @src-and(ptr %contents.0, i64 %contents.1) { +start: + %cmp1 = icmp ugt i64 %contents.1, 7 + br i1 %cmp1, label %if, label %exit + +if: + %load = load i64, ptr %contents.0 + %cmp2 = icmp eq i64 %load, 123 + %cmp3 = icmp eq i64 %contents.1, 16 + %and = or i1 %cmp2, %cmp3 + br i1 %and, label %if2, label %exit + +if2: + ret i32 1 + +exit: + ret i32 0 +} + define i32 @src-sideeffects(ptr %contents.0, i64 %contents.1) { -; CHECK-LABEL: define i32 @src-sideeffects( -; CHECK-SAME: ptr [[CONTENTS_0:%.*]], i64 [[CONTENTS_1:%.*]]) { -; CHECK-NEXT: [[START:.*:]] -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i64 [[CONTENTS_1]], 7 -; CHECK-NEXT: br i1 [[CMP1]], label %[[IF:.*]], label %[[EXIT:.*]] -; CHECK: [[IF]]: -; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[CONTENTS_0]], align 4 -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[LOAD]], 123 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[CONTENTS_1]], 16 -; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP2]], [[CMP3]] -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[LOAD]], [[CONTENTS_1]] -; CHECK-NEXT: store i64 [[ADD]], ptr [[CONTENTS_0]], align 4 -; CHECK-NEXT: br i1 [[AND]], label %[[COMMON_RET:.*]], label %[[EXIT]] -; CHECK: [[COMMON_RET]]: -; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, %[[EXIT]] ], [ 1, %[[IF]] ] -; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: br label %[[COMMON_RET]] -; start: %cmp1 = icmp ugt i64 %contents.1, 7 br i1 %cmp1, label %if, label %exit From f0ca7b97da5acae6c4c0e684c0c43b927289b162 Mon Sep 17 00:00:00 2001 From: Vedant Paranjape Date: Thu, 18 Sep 2025 00:42:15 -0400 Subject: [PATCH 08/10] fixed issue with handling or case --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 25 +++------ .../SimplifyCFG/hoist-implied-condition.ll | 54 ++++++++++++++++++- 2 files changed, 60 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 1b71087d53508..fccff0c0e34ed 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1871,16 +1871,14 @@ visitConditions(Value *V, const Value *BaseCond, const BasicBlock *ContextBB, return std::nullopt; std::optional Imp = isImpliedCondition(V, BaseCond, DL); - // TODO: Handle negated condition case. - if (Imp != true) - return std::nullopt; - std::optional LHS = visitConditions(I->getOperand(0), BaseCond, ContextBB, ImpliedConditions, DL); std::optional RHS = visitConditions(I->getOperand(1), BaseCond, ContextBB, ImpliedConditions, DL); - if (!LHS.has_value() && !RHS.has_value()) { + // TODO: Handle negated condition case. + // Leaf condition node that implies the base condition. + if (Imp == true && !LHS.has_value() && !RHS.has_value()) { ImpliedConditions.push_back(I); } @@ -1937,20 +1935,11 @@ static bool hoistImplyingConditions(BranchInst *BI, IRBuilder<> &Builder, // Now that we know ChildBI condition implies parent BI condition, // we need to find out which conditions to hoist out. SmallVector HoistCandidates; - std::optional Imp = - visitConditions(ChildBI->getCondition(), BI->getCondition(), - (!IsCommonBBonTruePath ? ParentTrueBB : ParentFalseBB), - HoistCandidates, DL); - // We found no implication relationship. - if (!Imp.has_value()) - return false; - - // TODO: Handle negated condition case. - if (Imp == false) - return false; - + visitConditions(ChildBI->getCondition(), BI->getCondition(), + (!IsCommonBBonTruePath ? ParentTrueBB : ParentFalseBB), + HoistCandidates, DL); // We don't handle multiple hoist candidates for now. - if (HoistCandidates.size() > 1) + if (HoistCandidates.empty() || HoistCandidates.size() > 2) return false; // We can hoist the condition. diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-implied-condition.ll b/llvm/test/Transforms/SimplifyCFG/hoist-implied-condition.ll index 565e2df6f4440..37655daacced5 100644 --- a/llvm/test/Transforms/SimplifyCFG/hoist-implied-condition.ll +++ b/llvm/test/Transforms/SimplifyCFG/hoist-implied-condition.ll @@ -2,7 +2,23 @@ ; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s define i32 @src(ptr %contents.0, i64 %contents.1) { - +; CHECK-LABEL: define i32 @src( +; CHECK-SAME: ptr [[CONTENTS_0:%.*]], i64 [[CONTENTS_1:%.*]]) { +; CHECK-NEXT: [[START:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[CONTENTS_1]], 16 +; CHECK-NEXT: br i1 [[TMP0]], label %[[IF:.*]], label %[[EXIT:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[CONTENTS_0]], align 4 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[LOAD]], 123 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[CONTENTS_1]], 16 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP2]], true +; CHECK-NEXT: br i1 [[AND]], label %[[COMMON_RET:.*]], label %[[EXIT]] +; CHECK: [[COMMON_RET]]: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, %[[EXIT]] ], [ 1, %[[IF]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: br label %[[COMMON_RET]] +; start: %cmp1 = icmp ugt i64 %contents.1, 7 br i1 %cmp1, label %if, label %exit @@ -22,6 +38,23 @@ exit: } define i32 @src-and(ptr %contents.0, i64 %contents.1) { +; CHECK-LABEL: define i32 @src-and( +; CHECK-SAME: ptr [[CONTENTS_0:%.*]], i64 [[CONTENTS_1:%.*]]) { +; CHECK-NEXT: [[START:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[CONTENTS_1]], 16 +; CHECK-NEXT: br i1 [[TMP0]], label %[[IF:.*]], label %[[EXIT:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[CONTENTS_0]], align 4 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[LOAD]], 123 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[CONTENTS_1]], 16 +; CHECK-NEXT: [[AND:%.*]] = or i1 [[CMP2]], true +; CHECK-NEXT: br i1 [[AND]], label %[[COMMON_RET:.*]], label %[[EXIT]] +; CHECK: [[COMMON_RET]]: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, %[[EXIT]] ], [ 1, %[[IF]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: br label %[[COMMON_RET]] +; start: %cmp1 = icmp ugt i64 %contents.1, 7 br i1 %cmp1, label %if, label %exit @@ -41,6 +74,25 @@ exit: } define i32 @src-sideeffects(ptr %contents.0, i64 %contents.1) { +; CHECK-LABEL: define i32 @src-sideeffects( +; CHECK-SAME: ptr [[CONTENTS_0:%.*]], i64 [[CONTENTS_1:%.*]]) { +; CHECK-NEXT: [[START:.*:]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i64 [[CONTENTS_1]], 7 +; CHECK-NEXT: br i1 [[CMP1]], label %[[IF:.*]], label %[[EXIT:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[CONTENTS_0]], align 4 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[LOAD]], 123 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[CONTENTS_1]], 16 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP2]], [[CMP3]] +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[LOAD]], [[CONTENTS_1]] +; CHECK-NEXT: store i64 [[ADD]], ptr [[CONTENTS_0]], align 4 +; CHECK-NEXT: br i1 [[AND]], label %[[COMMON_RET:.*]], label %[[EXIT]] +; CHECK: [[COMMON_RET]]: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, %[[EXIT]] ], [ 1, %[[IF]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: br label %[[COMMON_RET]] +; start: %cmp1 = icmp ugt i64 %contents.1, 7 br i1 %cmp1, label %if, label %exit From 4b778120e50f7221016366ff0cc3fdee67d8c985 Mon Sep 17 00:00:00 2001 From: Vedant Paranjape Date: Thu, 18 Sep 2025 01:31:24 -0400 Subject: [PATCH 09/10] fix testcase crashes --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index fccff0c0e34ed..d8adc351904b3 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1857,6 +1857,9 @@ static std::optional visitConditions(Value *V, const Value *BaseCond, const BasicBlock *ContextBB, SmallVectorImpl &ImpliedConditions, const DataLayout &DL) { + if (!V) + return std::nullopt; + Instruction *I = dyn_cast(V); if (!I) return std::nullopt; @@ -1873,8 +1876,9 @@ visitConditions(Value *V, const Value *BaseCond, const BasicBlock *ContextBB, std::optional Imp = isImpliedCondition(V, BaseCond, DL); std::optional LHS = visitConditions(I->getOperand(0), BaseCond, ContextBB, ImpliedConditions, DL); - std::optional RHS = visitConditions(I->getOperand(1), BaseCond, - ContextBB, ImpliedConditions, DL); + std::optional RHS = + visitConditions((I->getNumOperands() >= 2 ? I->getOperand(1) : nullptr), + BaseCond, ContextBB, ImpliedConditions, DL); // TODO: Handle negated condition case. // Leaf condition node that implies the base condition. From 5bbe066104d4e308cbb26c3455b74596ad8bcafc Mon Sep 17 00:00:00 2001 From: Vedant Paranjape Date: Thu, 18 Sep 2025 01:49:07 -0400 Subject: [PATCH 10/10] updated tests --- llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll | 2 +- llvm/test/Transforms/SimplifyCFG/switch_create.ll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll b/llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll index 336fc5e14d758..eb52d32e40a66 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll @@ -491,7 +491,7 @@ define void @test12() nounwind { ; CHECK: bb55.us.us: ; CHECK-NEXT: [[B:%.*]] = icmp ugt i32 undef, undef ; CHECK-NEXT: [[A:%.*]] = icmp eq i32 undef, undef -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[B]], [[A]] +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[B]], true ; CHECK-NEXT: br i1 [[OR_COND]], label [[BB55_US_US]], label [[MALFORMED]] ; CHECK: malformed: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SimplifyCFG/switch_create.ll b/llvm/test/Transforms/SimplifyCFG/switch_create.ll index 18c4ade46162c..c5582683ef103 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch_create.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch_create.ll @@ -644,7 +644,7 @@ define void @test12() nounwind { ; CHECK: bb55.us.us: ; CHECK-NEXT: [[B:%.*]] = icmp ugt i32 undef, undef ; CHECK-NEXT: [[A:%.*]] = icmp eq i32 undef, undef -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[B]], [[A]] +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[B]], true ; CHECK-NEXT: br i1 [[OR_COND]], label [[BB55_US_US]], label [[MALFORMED]] ; CHECK: malformed: ; CHECK-NEXT: ret void