diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 65975d22747999..c4f678171d8a88 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1575,9 +1575,45 @@ static void computeKnownBitsFromOperator(const Operator *I, RecQ.CxtI = P->getIncomingBlock(u)->getTerminator(); Known2 = KnownBits(BitWidth); + // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. computeKnownBits(IncValue, Known2, MaxAnalysisRecursionDepth - 1, RecQ); + + // If this failed, see if we can use a conditional branch into the phi + // to help us determine the range of the value. + if (Known2.isUnknown()) { + ICmpInst::Predicate Pred; + const APInt *RHSC; + BasicBlock *TrueSucc, *FalseSucc; + // TODO: Use RHS Value and compute range from its known bits. + if (match(RecQ.CxtI, + m_Br(m_c_ICmp(Pred, m_Specific(IncValue), m_APInt(RHSC)), + m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) { + // Check for cases of duplicate successors. + if ((TrueSucc == P->getParent()) != (FalseSucc == P->getParent())) { + // If we're using the false successor, invert the predicate. + if (FalseSucc == P->getParent()) + Pred = CmpInst::getInversePredicate(Pred); + + switch (Pred) { + case CmpInst::Predicate::ICMP_EQ: + Known2 = KnownBits::makeConstant(*RHSC); + break; + case CmpInst::Predicate::ICMP_ULE: + Known2.Zero.setHighBits(RHSC->countLeadingZeros()); + break; + case CmpInst::Predicate::ICMP_ULT: + Known2.Zero.setHighBits((*RHSC - 1).countLeadingZeros()); + break; + default: + // TODO - add additional integer predicate handling. + break; + } + } + } + } + Known = KnownBits::commonBits(Known, Known2); // If all bits have been ruled out, there's no need to check // more operands. diff --git a/llvm/test/Transforms/InstCombine/known-phi-br.ll b/llvm/test/Transforms/InstCombine/known-phi-br.ll index 82b2ffbe2720bd..64d3344eb20665 100644 --- a/llvm/test/Transforms/InstCombine/known-phi-br.ll +++ b/llvm/test/Transforms/InstCombine/known-phi-br.ll @@ -6,7 +6,7 @@ ; the known bits of a phi edge based off a conditional branch feeding the phi. ; -; TODO: %x either eq 7 or is set to 7 +; %x either eq 7 or is set to 7 define i64 @limit_i64_eq_7(i64 %x) { ; CHECK-LABEL: @limit_i64_eq_7( ; CHECK-NEXT: entry: @@ -15,8 +15,7 @@ define i64 @limit_i64_eq_7(i64 %x) { ; CHECK: body: ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[X]], [[ENTRY:%.*]] ], [ 7, [[BODY]] ] -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 7 ; entry: %cmp = icmp eq i64 %x, 7 @@ -28,20 +27,21 @@ end: ret i64 %res } -; TODO: %x either eq 255 or is set to 255 +; %x either eq 255 or is set to 255 define i64 @limit_i64_ne_255(i64 %x) { ; CHECK-LABEL: @limit_i64_ne_255( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[X:%.*]], 255 -; CHECK-NEXT: br i1 [[CMP_NOT]], label [[END:%.*]], label [[BODY:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[X:%.*]], 255 +; CHECK-NEXT: call void @use(i1 [[CMP]]) +; CHECK-NEXT: br i1 [[CMP]], label [[BODY:%.*]], label [[END:%.*]] ; CHECK: body: ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[X]], [[ENTRY:%.*]] ], [ 255, [[BODY]] ] -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 255 ; entry: %cmp = icmp ne i64 %x, 255 + call void @use(i1 %cmp) br i1 %cmp, label %body, label %end body: br label %end @@ -49,8 +49,9 @@ end: %res = phi i64 [ %x, %entry ], [ 255, %body ] ret i64 %res } +declare void @use(i1) -; TODO: %x either ule 15 or is masked with 15 +; %x either ule 15 or is masked with 15 define i64 @limit_i64_ule_15(i64 %x) { ; CHECK-LABEL: @limit_i64_ule_15( ; CHECK-NEXT: entry: @@ -61,8 +62,7 @@ define i64 @limit_i64_ule_15(i64 %x) { ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[X_MASK:%.*]] = phi i64 [ [[X]], [[ENTRY:%.*]] ], [ [[MASK]], [[BODY]] ] -; CHECK-NEXT: [[RES:%.*]] = and i64 [[X_MASK]], 15 -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 [[X_MASK]] ; entry: %cmp = icmp ule i64 %x, 15 @@ -76,7 +76,7 @@ end: ret i64 %res } -; TODO: %x either uge 8 or is masked with 7 +; %x either uge 8 or is masked with 7 define i64 @limit_i64_uge_8(i64 %x) { ; CHECK-LABEL: @limit_i64_uge_8( ; CHECK-NEXT: entry: @@ -87,8 +87,7 @@ define i64 @limit_i64_uge_8(i64 %x) { ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[X_MASK:%.*]] = phi i64 [ [[X]], [[ENTRY:%.*]] ], [ [[MASK]], [[BODY]] ] -; CHECK-NEXT: [[RES:%.*]] = and i64 [[X_MASK]], 7 -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 [[X_MASK]] ; entry: %cmp = icmp uge i64 %x, 8 @@ -102,7 +101,7 @@ end: ret i64 %res } -; TODO: %x either ult 8 or is masked with 7 +; %x either ult 8 or is masked with 7 define i64 @limit_i64_ult_8(i64 %x) { ; CHECK-LABEL: @limit_i64_ult_8( ; CHECK-NEXT: entry: @@ -113,8 +112,7 @@ define i64 @limit_i64_ult_8(i64 %x) { ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[X_MASK:%.*]] = phi i64 [ [[X]], [[ENTRY:%.*]] ], [ [[MASK]], [[BODY]] ] -; CHECK-NEXT: [[RES:%.*]] = and i64 [[X_MASK]], 7 -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 [[X_MASK]] ; entry: %cmp = icmp ult i64 %x, 8 @@ -128,7 +126,7 @@ end: ret i64 %res } -; TODO: %x either ugt 7 or is masked with 7 +; %x either ugt 7 or is masked with 7 define i64 @limit_i64_ugt_7(i64 %x) { ; CHECK-LABEL: @limit_i64_ugt_7( ; CHECK-NEXT: entry: @@ -139,8 +137,7 @@ define i64 @limit_i64_ugt_7(i64 %x) { ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[X_MASK:%.*]] = phi i64 [ [[X]], [[ENTRY:%.*]] ], [ [[MASK]], [[BODY]] ] -; CHECK-NEXT: [[RES:%.*]] = and i64 [[X_MASK]], 7 -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 [[X_MASK]] ; entry: %cmp = icmp ugt i64 %x, 7 diff --git a/llvm/test/Transforms/InstCombine/remove-loop-phi-fastmul.ll b/llvm/test/Transforms/InstCombine/remove-loop-phi-fastmul.ll index c232e1743d9747..7ced7473454893 100644 --- a/llvm/test/Transforms/InstCombine/remove-loop-phi-fastmul.ll +++ b/llvm/test/Transforms/InstCombine/remove-loop-phi-fastmul.ll @@ -216,8 +216,8 @@ define double @test_multiple_phi_operands(ptr %arr_d, i1 %entry_cond) { ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]] ; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[MUL]] = fmul fast double [[F_PROD_01]], [[TMP0]] -; CHECK-NEXT: [[INC]] = add i64 [[I_02]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 1000 +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[I_02]], 999 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]] ; CHECK: end: ; CHECK-NEXT: ret double [[MUL]] @@ -255,8 +255,8 @@ define double @test_multiple_phi_operands_with_non_zero(ptr %arr_d, i1 %entry_co ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]] ; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[MUL]] = fmul fast double [[F_PROD_01]], [[TMP0]] -; CHECK-NEXT: [[INC]] = add i64 [[I_02]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 1000 +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[I_02]], 999 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]] ; CHECK: end: ; CHECK-NEXT: ret double [[MUL]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll index 2001a7528f5559..70b002f766b753 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll @@ -1,258 +1,49 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -O2 -S -mtriple=x86_64-- -mattr=+sse2 < %s | FileCheck %s --check-prefixes=SSE -; RUN: opt -O2 -S -mtriple=x86_64-- -mattr=+avx2 < %s | FileCheck %s --check-prefixes=AVX +; RUN: opt -O2 -S -mtriple=x86_64-- -mattr=+sse2 < %s | FileCheck %s +; RUN: opt -O2 -S -mtriple=x86_64-- -mattr=+avx2 < %s | FileCheck %s ; PR38280 / Issue #37628 define void @apply_delta(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i64 noundef %neg_offs, i64 noundef %count) { -; SSE-LABEL: @apply_delta( -; SSE-NEXT: entry: -; SSE-NEXT: [[CMP21:%.*]] = icmp ugt i64 [[COUNT:%.*]], 7 -; SSE-NEXT: br i1 [[CMP21]], label [[WHILE_BODY:%.*]], label [[WHILE_COND3_PREHEADER:%.*]] -; SSE: while.cond3.preheader: -; SSE-NEXT: [[COUNT_ADDR_0_LCSSA:%.*]] = phi i64 [ [[COUNT]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY]] ] -; SSE-NEXT: [[SRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[SRC:%.*]], [[ENTRY]] ], [ [[ADD_PTR2:%.*]], [[WHILE_BODY]] ] -; SSE-NEXT: [[DST_ADDR_0_LCSSA:%.*]] = phi ptr [ [[DST:%.*]], [[ENTRY]] ], [ [[ADD_PTR1:%.*]], [[WHILE_BODY]] ] -; SSE-NEXT: [[DST_ADDR_0_LCSSA1:%.*]] = ptrtoint ptr [[DST_ADDR_0_LCSSA]] to i64 -; SSE-NEXT: [[SRC_ADDR_0_LCSSA2:%.*]] = ptrtoint ptr [[SRC_ADDR_0_LCSSA]] to i64 -; SSE-NEXT: [[TOBOOL_NOT27:%.*]] = icmp eq i64 [[COUNT_ADDR_0_LCSSA]], 0 -; SSE-NEXT: br i1 [[TOBOOL_NOT27]], label [[WHILE_END9:%.*]], label [[ITER_CHECK:%.*]] -; SSE: iter.check: -; SSE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[COUNT_ADDR_0_LCSSA]], 8 -; SSE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[WHILE_BODY4_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; SSE: vector.memcheck: -; SSE-NEXT: [[TMP0:%.*]] = sub i64 [[DST_ADDR_0_LCSSA1]], [[SRC_ADDR_0_LCSSA2]] -; SSE-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 -; SSE-NEXT: [[TMP1:%.*]] = sub i64 [[DST_ADDR_0_LCSSA1]], [[SRC_ADDR_0_LCSSA2]] -; SSE-NEXT: [[DIFF_CHECK3:%.*]] = icmp ult i64 [[TMP1]], 32 -; SSE-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK3]] -; SSE-NEXT: [[TMP2:%.*]] = add i64 [[NEG_OFFS:%.*]], 31 -; SSE-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP2]], 32 -; SSE-NEXT: [[CONFLICT_RDX5:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK4]] -; SSE-NEXT: br i1 [[CONFLICT_RDX5]], label [[WHILE_BODY4_PREHEADER]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] -; SSE: vector.main.loop.iter.check: -; SSE-NEXT: [[MIN_ITERS_CHECK6:%.*]] = icmp ult i64 [[COUNT_ADDR_0_LCSSA]], 32 -; SSE-NEXT: br i1 [[MIN_ITERS_CHECK6]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] -; SSE: vector.ph: -; SSE-NEXT: [[N_VEC:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], -32 -; SSE-NEXT: br label [[VECTOR_BODY:%.*]] -; SSE: vector.body: -; SSE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SSE-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_ADDR_0_LCSSA]], i64 [[INDEX]] -; SSE-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[SRC_ADDR_0_LCSSA]], i64 [[INDEX]] -; SSE-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP8]], align 1 -; SSE-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP8]], i64 16 -; SSE-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 -; SSE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 [[NEG_OFFS]] -; SSE-NEXT: [[WIDE_LOAD11:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 -; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16 -; SSE-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 -; SSE-NEXT: [[TMP6:%.*]] = add <16 x i8> [[WIDE_LOAD11]], [[WIDE_LOAD]] -; SSE-NEXT: [[TMP7:%.*]] = add <16 x i8> [[WIDE_LOAD12]], [[WIDE_LOAD10]] -; SSE-NEXT: store <16 x i8> [[TMP6]], ptr [[NEXT_GEP]], align 1 -; SSE-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 -; SSE-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 1 -; SSE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; SSE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SSE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; SSE: middle.block: -; SSE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[COUNT_ADDR_0_LCSSA]], [[N_VEC]] -; SSE-NEXT: br i1 [[CMP_N]], label [[WHILE_END9]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; SSE: vec.epilog.iter.check: -; SSE-NEXT: [[IND_END20:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], 31 -; SSE-NEXT: [[IND_END17:%.*]] = getelementptr i8, ptr [[SRC_ADDR_0_LCSSA]], i64 [[N_VEC]] -; SSE-NEXT: [[IND_END15:%.*]] = getelementptr i8, ptr [[DST_ADDR_0_LCSSA]], i64 [[N_VEC]] -; SSE-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], 24 -; SSE-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 -; SSE-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[WHILE_BODY4_PREHEADER]], label [[VEC_EPILOG_PH]] -; SSE: vec.epilog.ph: -; SSE-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; SSE-NEXT: [[N_VEC14:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], -8 -; SSE-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST_ADDR_0_LCSSA]], i64 [[N_VEC14]] -; SSE-NEXT: [[IND_END16:%.*]] = getelementptr i8, ptr [[SRC_ADDR_0_LCSSA]], i64 [[N_VEC14]] -; SSE-NEXT: [[IND_END19:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], 7 -; SSE-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; SSE: vec.epilog.vector.body: -; SSE-NEXT: [[INDEX23:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT28:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; SSE-NEXT: [[NEXT_GEP24:%.*]] = getelementptr i8, ptr [[DST_ADDR_0_LCSSA]], i64 [[INDEX23]] -; SSE-NEXT: [[NEXT_GEP25:%.*]] = getelementptr i8, ptr [[SRC_ADDR_0_LCSSA]], i64 [[INDEX23]] -; SSE-NEXT: [[WIDE_LOAD26:%.*]] = load <8 x i8>, ptr [[NEXT_GEP25]], align 1 -; SSE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP24]], i64 [[NEG_OFFS]] -; SSE-NEXT: [[WIDE_LOAD27:%.*]] = load <8 x i8>, ptr [[TMP10]], align 1 -; SSE-NEXT: [[TMP11:%.*]] = add <8 x i8> [[WIDE_LOAD27]], [[WIDE_LOAD26]] -; SSE-NEXT: store <8 x i8> [[TMP11]], ptr [[NEXT_GEP24]], align 1 -; SSE-NEXT: [[INDEX_NEXT28]] = add nuw i64 [[INDEX23]], 8 -; SSE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT28]], [[N_VEC14]] -; SSE-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] -; SSE: vec.epilog.middle.block: -; SSE-NEXT: [[CMP_N22:%.*]] = icmp eq i64 [[COUNT_ADDR_0_LCSSA]], [[N_VEC14]] -; SSE-NEXT: br i1 [[CMP_N22]], label [[WHILE_END9]], label [[WHILE_BODY4_PREHEADER]] -; SSE: while.body4.preheader: -; SSE-NEXT: [[DST_ADDR_130_PH:%.*]] = phi ptr [ [[DST_ADDR_0_LCSSA]], [[ITER_CHECK]] ], [ [[DST_ADDR_0_LCSSA]], [[VECTOR_MEMCHECK]] ], [ [[IND_END15]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; SSE-NEXT: [[SRC_ADDR_129_PH:%.*]] = phi ptr [ [[SRC_ADDR_0_LCSSA]], [[ITER_CHECK]] ], [ [[SRC_ADDR_0_LCSSA]], [[VECTOR_MEMCHECK]] ], [ [[IND_END17]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; SSE-NEXT: [[COUNT_ADDR_128_PH:%.*]] = phi i64 [ [[COUNT_ADDR_0_LCSSA]], [[ITER_CHECK]] ], [ [[COUNT_ADDR_0_LCSSA]], [[VECTOR_MEMCHECK]] ], [ [[IND_END20]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; SSE-NEXT: br label [[WHILE_BODY4:%.*]] -; SSE: while.body: -; SSE-NEXT: [[DST_ADDR_024:%.*]] = phi ptr [ [[ADD_PTR1]], [[WHILE_BODY]] ], [ [[DST]], [[ENTRY]] ] -; SSE-NEXT: [[SRC_ADDR_023:%.*]] = phi ptr [ [[ADD_PTR2]], [[WHILE_BODY]] ], [ [[SRC]], [[ENTRY]] ] -; SSE-NEXT: [[COUNT_ADDR_022:%.*]] = phi i64 [ [[SUB]], [[WHILE_BODY]] ], [ [[COUNT]], [[ENTRY]] ] -; SSE-NEXT: [[TMP13:%.*]] = load <8 x i8>, ptr [[SRC_ADDR_023]], align 1 -; SSE-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[DST_ADDR_024]], i64 [[NEG_OFFS]] -; SSE-NEXT: [[TMP14:%.*]] = load <8 x i8>, ptr [[ADD_PTR]], align 1 -; SSE-NEXT: [[ADD:%.*]] = add <8 x i8> [[TMP14]], [[TMP13]] -; SSE-NEXT: store <8 x i8> [[ADD]], ptr [[DST_ADDR_024]], align 1 -; SSE-NEXT: [[ADD_PTR1]] = getelementptr inbounds i8, ptr [[DST_ADDR_024]], i64 8 -; SSE-NEXT: [[ADD_PTR2]] = getelementptr inbounds i8, ptr [[SRC_ADDR_023]], i64 8 -; SSE-NEXT: [[SUB]] = add i64 [[COUNT_ADDR_022]], -8 -; SSE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[SUB]], 7 -; SSE-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_COND3_PREHEADER]] -; SSE: while.body4: -; SSE-NEXT: [[DST_ADDR_130:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY4]] ], [ [[DST_ADDR_130_PH]], [[WHILE_BODY4_PREHEADER]] ] -; SSE-NEXT: [[SRC_ADDR_129:%.*]] = phi ptr [ [[INCDEC_PTR8:%.*]], [[WHILE_BODY4]] ], [ [[SRC_ADDR_129_PH]], [[WHILE_BODY4_PREHEADER]] ] -; SSE-NEXT: [[COUNT_ADDR_128:%.*]] = phi i64 [ [[DEC:%.*]], [[WHILE_BODY4]] ], [ [[COUNT_ADDR_128_PH]], [[WHILE_BODY4_PREHEADER]] ] -; SSE-NEXT: [[DEC]] = add i64 [[COUNT_ADDR_128]], -1 -; SSE-NEXT: [[TMP15:%.*]] = load i8, ptr [[SRC_ADDR_129]], align 1 -; SSE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST_ADDR_130]], i64 [[NEG_OFFS]] -; SSE-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; SSE-NEXT: [[ADD6:%.*]] = add i8 [[TMP16]], [[TMP15]] -; SSE-NEXT: store i8 [[ADD6]], ptr [[DST_ADDR_130]], align 1 -; SSE-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[DST_ADDR_130]], i64 1 -; SSE-NEXT: [[INCDEC_PTR8]] = getelementptr inbounds i8, ptr [[SRC_ADDR_129]], i64 1 -; SSE-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[DEC]], 0 -; SSE-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END9]], label [[WHILE_BODY4]], !llvm.loop [[LOOP4:![0-9]+]] -; SSE: while.end9: -; SSE-NEXT: ret void -; -; AVX-LABEL: @apply_delta( -; AVX-NEXT: entry: -; AVX-NEXT: [[CMP21:%.*]] = icmp ugt i64 [[COUNT:%.*]], 7 -; AVX-NEXT: br i1 [[CMP21]], label [[WHILE_BODY:%.*]], label [[WHILE_COND3_PREHEADER:%.*]] -; AVX: while.cond3.preheader: -; AVX-NEXT: [[COUNT_ADDR_0_LCSSA:%.*]] = phi i64 [ [[COUNT]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY]] ] -; AVX-NEXT: [[SRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[SRC:%.*]], [[ENTRY]] ], [ [[ADD_PTR2:%.*]], [[WHILE_BODY]] ] -; AVX-NEXT: [[DST_ADDR_0_LCSSA:%.*]] = phi ptr [ [[DST:%.*]], [[ENTRY]] ], [ [[ADD_PTR1:%.*]], [[WHILE_BODY]] ] -; AVX-NEXT: [[DST_ADDR_0_LCSSA1:%.*]] = ptrtoint ptr [[DST_ADDR_0_LCSSA]] to i64 -; AVX-NEXT: [[SRC_ADDR_0_LCSSA2:%.*]] = ptrtoint ptr [[SRC_ADDR_0_LCSSA]] to i64 -; AVX-NEXT: [[TOBOOL_NOT27:%.*]] = icmp eq i64 [[COUNT_ADDR_0_LCSSA]], 0 -; AVX-NEXT: br i1 [[TOBOOL_NOT27]], label [[WHILE_END9:%.*]], label [[ITER_CHECK:%.*]] -; AVX: iter.check: -; AVX-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[COUNT_ADDR_0_LCSSA]], 16 -; AVX-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[WHILE_BODY4_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; AVX: vector.memcheck: -; AVX-NEXT: [[TMP0:%.*]] = sub i64 [[DST_ADDR_0_LCSSA1]], [[SRC_ADDR_0_LCSSA2]] -; AVX-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128 -; AVX-NEXT: [[TMP1:%.*]] = sub i64 [[DST_ADDR_0_LCSSA1]], [[SRC_ADDR_0_LCSSA2]] -; AVX-NEXT: [[DIFF_CHECK3:%.*]] = icmp ult i64 [[TMP1]], 128 -; AVX-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK3]] -; AVX-NEXT: [[TMP2:%.*]] = add i64 [[NEG_OFFS:%.*]], 127 -; AVX-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP2]], 128 -; AVX-NEXT: [[CONFLICT_RDX5:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK4]] -; AVX-NEXT: br i1 [[CONFLICT_RDX5]], label [[WHILE_BODY4_PREHEADER]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] -; AVX: vector.main.loop.iter.check: -; AVX-NEXT: [[MIN_ITERS_CHECK6:%.*]] = icmp ult i64 [[COUNT_ADDR_0_LCSSA]], 128 -; AVX-NEXT: br i1 [[MIN_ITERS_CHECK6]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] -; AVX: vector.ph: -; AVX-NEXT: [[N_VEC:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], -128 -; AVX-NEXT: br label [[VECTOR_BODY:%.*]] -; AVX: vector.body: -; AVX-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_ADDR_0_LCSSA]], i64 [[INDEX]] -; AVX-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[SRC_ADDR_0_LCSSA]], i64 [[INDEX]] -; AVX-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[NEXT_GEP10]], align 1 -; AVX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP10]], i64 32 -; AVX-NEXT: [[WIDE_LOAD14:%.*]] = load <32 x i8>, ptr [[TMP3]], align 1 -; AVX-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[NEXT_GEP10]], i64 64 -; AVX-NEXT: [[WIDE_LOAD15:%.*]] = load <32 x i8>, ptr [[TMP4]], align 1 -; AVX-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[NEXT_GEP10]], i64 96 -; AVX-NEXT: [[WIDE_LOAD16:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1 -; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 [[NEG_OFFS]] -; AVX-NEXT: [[WIDE_LOAD17:%.*]] = load <32 x i8>, ptr [[TMP6]], align 1 -; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 32 -; AVX-NEXT: [[WIDE_LOAD18:%.*]] = load <32 x i8>, ptr [[TMP7]], align 1 -; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 64 -; AVX-NEXT: [[WIDE_LOAD19:%.*]] = load <32 x i8>, ptr [[TMP8]], align 1 -; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 96 -; AVX-NEXT: [[WIDE_LOAD20:%.*]] = load <32 x i8>, ptr [[TMP9]], align 1 -; AVX-NEXT: [[TMP10:%.*]] = add <32 x i8> [[WIDE_LOAD17]], [[WIDE_LOAD]] -; AVX-NEXT: [[TMP11:%.*]] = add <32 x i8> [[WIDE_LOAD18]], [[WIDE_LOAD14]] -; AVX-NEXT: [[TMP12:%.*]] = add <32 x i8> [[WIDE_LOAD19]], [[WIDE_LOAD15]] -; AVX-NEXT: [[TMP13:%.*]] = add <32 x i8> [[WIDE_LOAD20]], [[WIDE_LOAD16]] -; AVX-NEXT: store <32 x i8> [[TMP10]], ptr [[NEXT_GEP]], align 1 -; AVX-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 32 -; AVX-NEXT: store <32 x i8> [[TMP11]], ptr [[TMP14]], align 1 -; AVX-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 64 -; AVX-NEXT: store <32 x i8> [[TMP12]], ptr [[TMP15]], align 1 -; AVX-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 96 -; AVX-NEXT: store <32 x i8> [[TMP13]], ptr [[TMP16]], align 1 -; AVX-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128 -; AVX-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; AVX-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; AVX: middle.block: -; AVX-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[COUNT_ADDR_0_LCSSA]], [[N_VEC]] -; AVX-NEXT: br i1 [[CMP_N]], label [[WHILE_END9]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; AVX: vec.epilog.iter.check: -; AVX-NEXT: [[IND_END28:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], 127 -; AVX-NEXT: [[IND_END25:%.*]] = getelementptr i8, ptr [[SRC_ADDR_0_LCSSA]], i64 [[N_VEC]] -; AVX-NEXT: [[IND_END23:%.*]] = getelementptr i8, ptr [[DST_ADDR_0_LCSSA]], i64 [[N_VEC]] -; AVX-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], 112 -; AVX-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 -; AVX-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[WHILE_BODY4_PREHEADER]], label [[VEC_EPILOG_PH]] -; AVX: vec.epilog.ph: -; AVX-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; AVX-NEXT: [[N_VEC22:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], -16 -; AVX-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST_ADDR_0_LCSSA]], i64 [[N_VEC22]] -; AVX-NEXT: [[IND_END24:%.*]] = getelementptr i8, ptr [[SRC_ADDR_0_LCSSA]], i64 [[N_VEC22]] -; AVX-NEXT: [[IND_END27:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], 15 -; AVX-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; AVX: vec.epilog.vector.body: -; AVX-NEXT: [[INDEX31:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT36:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; AVX-NEXT: [[NEXT_GEP32:%.*]] = getelementptr i8, ptr [[DST_ADDR_0_LCSSA]], i64 [[INDEX31]] -; AVX-NEXT: [[NEXT_GEP33:%.*]] = getelementptr i8, ptr [[SRC_ADDR_0_LCSSA]], i64 [[INDEX31]] -; AVX-NEXT: [[WIDE_LOAD34:%.*]] = load <16 x i8>, ptr [[NEXT_GEP33]], align 1 -; AVX-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP32]], i64 [[NEG_OFFS]] -; AVX-NEXT: [[WIDE_LOAD35:%.*]] = load <16 x i8>, ptr [[TMP18]], align 1 -; AVX-NEXT: [[TMP19:%.*]] = add <16 x i8> [[WIDE_LOAD35]], [[WIDE_LOAD34]] -; AVX-NEXT: store <16 x i8> [[TMP19]], ptr [[NEXT_GEP32]], align 1 -; AVX-NEXT: [[INDEX_NEXT36]] = add nuw i64 [[INDEX31]], 16 -; AVX-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT36]], [[N_VEC22]] -; AVX-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] -; AVX: vec.epilog.middle.block: -; AVX-NEXT: [[CMP_N30:%.*]] = icmp eq i64 [[COUNT_ADDR_0_LCSSA]], [[N_VEC22]] -; AVX-NEXT: br i1 [[CMP_N30]], label [[WHILE_END9]], label [[WHILE_BODY4_PREHEADER]] -; AVX: while.body4.preheader: -; AVX-NEXT: [[DST_ADDR_130_PH:%.*]] = phi ptr [ [[DST_ADDR_0_LCSSA]], [[ITER_CHECK]] ], [ [[DST_ADDR_0_LCSSA]], [[VECTOR_MEMCHECK]] ], [ [[IND_END23]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; AVX-NEXT: [[SRC_ADDR_129_PH:%.*]] = phi ptr [ [[SRC_ADDR_0_LCSSA]], [[ITER_CHECK]] ], [ [[SRC_ADDR_0_LCSSA]], [[VECTOR_MEMCHECK]] ], [ [[IND_END25]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; AVX-NEXT: [[COUNT_ADDR_128_PH:%.*]] = phi i64 [ [[COUNT_ADDR_0_LCSSA]], [[ITER_CHECK]] ], [ [[COUNT_ADDR_0_LCSSA]], [[VECTOR_MEMCHECK]] ], [ [[IND_END28]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; AVX-NEXT: br label [[WHILE_BODY4:%.*]] -; AVX: while.body: -; AVX-NEXT: [[DST_ADDR_024:%.*]] = phi ptr [ [[ADD_PTR1]], [[WHILE_BODY]] ], [ [[DST]], [[ENTRY]] ] -; AVX-NEXT: [[SRC_ADDR_023:%.*]] = phi ptr [ [[ADD_PTR2]], [[WHILE_BODY]] ], [ [[SRC]], [[ENTRY]] ] -; AVX-NEXT: [[COUNT_ADDR_022:%.*]] = phi i64 [ [[SUB]], [[WHILE_BODY]] ], [ [[COUNT]], [[ENTRY]] ] -; AVX-NEXT: [[TMP21:%.*]] = load <8 x i8>, ptr [[SRC_ADDR_023]], align 1 -; AVX-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[DST_ADDR_024]], i64 [[NEG_OFFS]] -; AVX-NEXT: [[TMP22:%.*]] = load <8 x i8>, ptr [[ADD_PTR]], align 1 -; AVX-NEXT: [[ADD:%.*]] = add <8 x i8> [[TMP22]], [[TMP21]] -; AVX-NEXT: store <8 x i8> [[ADD]], ptr [[DST_ADDR_024]], align 1 -; AVX-NEXT: [[ADD_PTR1]] = getelementptr inbounds i8, ptr [[DST_ADDR_024]], i64 8 -; AVX-NEXT: [[ADD_PTR2]] = getelementptr inbounds i8, ptr [[SRC_ADDR_023]], i64 8 -; AVX-NEXT: [[SUB]] = add i64 [[COUNT_ADDR_022]], -8 -; AVX-NEXT: [[CMP:%.*]] = icmp ugt i64 [[SUB]], 7 -; AVX-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_COND3_PREHEADER]] -; AVX: while.body4: -; AVX-NEXT: [[DST_ADDR_130:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY4]] ], [ [[DST_ADDR_130_PH]], [[WHILE_BODY4_PREHEADER]] ] -; AVX-NEXT: [[SRC_ADDR_129:%.*]] = phi ptr [ [[INCDEC_PTR8:%.*]], [[WHILE_BODY4]] ], [ [[SRC_ADDR_129_PH]], [[WHILE_BODY4_PREHEADER]] ] -; AVX-NEXT: [[COUNT_ADDR_128:%.*]] = phi i64 [ [[DEC:%.*]], [[WHILE_BODY4]] ], [ [[COUNT_ADDR_128_PH]], [[WHILE_BODY4_PREHEADER]] ] -; AVX-NEXT: [[DEC]] = add i64 [[COUNT_ADDR_128]], -1 -; AVX-NEXT: [[TMP23:%.*]] = load i8, ptr [[SRC_ADDR_129]], align 1 -; AVX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST_ADDR_130]], i64 [[NEG_OFFS]] -; AVX-NEXT: [[TMP24:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; AVX-NEXT: [[ADD6:%.*]] = add i8 [[TMP24]], [[TMP23]] -; AVX-NEXT: store i8 [[ADD6]], ptr [[DST_ADDR_130]], align 1 -; AVX-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[DST_ADDR_130]], i64 1 -; AVX-NEXT: [[INCDEC_PTR8]] = getelementptr inbounds i8, ptr [[SRC_ADDR_129]], i64 1 -; AVX-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[DEC]], 0 -; AVX-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END9]], label [[WHILE_BODY4]], !llvm.loop [[LOOP4:![0-9]+]] -; AVX: while.end9: -; AVX-NEXT: ret void +; CHECK-LABEL: @apply_delta( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP21:%.*]] = icmp ugt i64 [[COUNT:%.*]], 7 +; CHECK-NEXT: br i1 [[CMP21]], label [[WHILE_BODY:%.*]], label [[WHILE_COND3_PREHEADER:%.*]] +; CHECK: while.cond3.preheader: +; CHECK-NEXT: [[COUNT_ADDR_0_LCSSA:%.*]] = phi i64 [ [[COUNT]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[SRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[SRC:%.*]], [[ENTRY]] ], [ [[ADD_PTR2:%.*]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[DST_ADDR_0_LCSSA:%.*]] = phi ptr [ [[DST:%.*]], [[ENTRY]] ], [ [[ADD_PTR1:%.*]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[TOBOOL_NOT27:%.*]] = icmp eq i64 [[COUNT_ADDR_0_LCSSA]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT27]], label [[WHILE_END9:%.*]], label [[WHILE_BODY4:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[DST_ADDR_024:%.*]] = phi ptr [ [[ADD_PTR1]], [[WHILE_BODY]] ], [ [[DST]], [[ENTRY]] ] +; CHECK-NEXT: [[SRC_ADDR_023:%.*]] = phi ptr [ [[ADD_PTR2]], [[WHILE_BODY]] ], [ [[SRC]], [[ENTRY]] ] +; CHECK-NEXT: [[COUNT_ADDR_022:%.*]] = phi i64 [ [[SUB]], [[WHILE_BODY]] ], [ [[COUNT]], [[ENTRY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[SRC_ADDR_023]], align 1 +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[DST_ADDR_024]], i64 [[NEG_OFFS:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ADD_PTR]], align 1 +; CHECK-NEXT: [[ADD:%.*]] = add <8 x i8> [[TMP1]], [[TMP0]] +; CHECK-NEXT: store <8 x i8> [[ADD]], ptr [[DST_ADDR_024]], align 1 +; CHECK-NEXT: [[ADD_PTR1]] = getelementptr inbounds i8, ptr [[DST_ADDR_024]], i64 8 +; CHECK-NEXT: [[ADD_PTR2]] = getelementptr inbounds i8, ptr [[SRC_ADDR_023]], i64 8 +; CHECK-NEXT: [[SUB]] = add i64 [[COUNT_ADDR_022]], -8 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[SUB]], 7 +; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_COND3_PREHEADER]] +; CHECK: while.body4: +; CHECK-NEXT: [[DST_ADDR_130:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY4]] ], [ [[DST_ADDR_0_LCSSA]], [[WHILE_COND3_PREHEADER]] ] +; CHECK-NEXT: [[SRC_ADDR_129:%.*]] = phi ptr [ [[INCDEC_PTR8:%.*]], [[WHILE_BODY4]] ], [ [[SRC_ADDR_0_LCSSA]], [[WHILE_COND3_PREHEADER]] ] +; CHECK-NEXT: [[COUNT_ADDR_128:%.*]] = phi i64 [ [[DEC:%.*]], [[WHILE_BODY4]] ], [ [[COUNT_ADDR_0_LCSSA]], [[WHILE_COND3_PREHEADER]] ] +; CHECK-NEXT: [[DEC]] = add i64 [[COUNT_ADDR_128]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[SRC_ADDR_129]], align 1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST_ADDR_130]], i64 [[NEG_OFFS]] +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ADD6:%.*]] = add i8 [[TMP3]], [[TMP2]] +; CHECK-NEXT: store i8 [[ADD6]], ptr [[DST_ADDR_130]], align 1 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[DST_ADDR_130]], i64 1 +; CHECK-NEXT: [[INCDEC_PTR8]] = getelementptr inbounds i8, ptr [[SRC_ADDR_129]], i64 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END9]], label [[WHILE_BODY4]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: while.end9: +; CHECK-NEXT: ret void ; entry: %cmp21 = icmp ugt i64 %count, 7