diff --git a/clang/test/CodeGen/ubsan-pointer-overflow.c b/clang/test/CodeGen/ubsan-pointer-overflow.c index e2b5e498cf0b86..0293762c3b8866 100644 --- a/clang/test/CodeGen/ubsan-pointer-overflow.c +++ b/clang/test/CodeGen/ubsan-pointer-overflow.c @@ -85,7 +85,6 @@ void struct_index(struct S1 *p) { // CHECK: getelementptr inbounds %struct.S1, %struct.S1* [[P:%.*]], i64 10 // CHECK-NEXT: [[BASE:%.*]] = ptrtoint %struct.S1* [[P]] to i64, !nosanitize // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 [[BASE]], 240, !nosanitize - // CHECK: select // CHECK: @__ubsan_handle_pointer_overflow{{.*}} i64 [[BASE]], i64 [[COMPGEP]]) {{.*}}, !nosanitize // CHECK-NOT: @__ubsan_handle_pointer_overflow diff --git a/clang/test/CodeGenCXX/pr27030.cpp b/clang/test/CodeGenCXX/pr27030.cpp index ce83c897989182..03fe1d59d822bd 100644 --- a/clang/test/CodeGenCXX/pr27030.cpp +++ b/clang/test/CodeGenCXX/pr27030.cpp @@ -11,6 +11,5 @@ void test1() { (int A::*)(a); } // CHECK: br i1 %[[memptr_cmp]] // CHECK: %[[adj:.*]] = sub nsw i32 %[[load]], 0 -// CHECK: %[[nv_adj:.*]] = select i1 true, i32 %[[adj]], i32 0 -// CHECK: %[[memptr_converted:.*]] = phi i32 [ -1, {{.*}} ], [ %[[nv_adj]], {{.*}} ] +// CHECK: %[[memptr_converted:.*]] = phi i32 [ -1, {{.*}} ], [ %[[adj]], {{.*}} ] diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 0f4945bad5ab53..326c1e466f9de7 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -973,10 +973,14 @@ CallInst *IRBuilderBase::CreateConstrainedFPCall( Value *IRBuilderBase::CreateSelect(Value *C, Value *True, Value *False, const Twine &Name, Instruction *MDFrom) { - if (auto *CC = dyn_cast(C)) + if (auto *CC = dyn_cast(C)) + return CC->isOne() ? True : False; + + if (auto *CC = dyn_cast(C)) { if (auto *TC = dyn_cast(True)) if (auto *FC = dyn_cast(False)) return Insert(Folder.CreateSelect(CC, TC, FC), Name); + } SelectInst *Sel = SelectInst::Create(C, True, False); if (MDFrom) { diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll index acc4b7e1381189..9cdf846e30d381 100644 --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -360,7 +360,6 @@ define void @unnatural_cfg2(i32* %p0, i32 %a0) { ; CHECK: %loop.body2 ; CHECK: %loop.body4 ; CHECK: %loop.inner2.begin -; CHECK: %loop.inner2.begin ; CHECK: %loop.body3 ; CHECK: %loop.inner1.begin ; CHECK: %bail diff --git a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll index 5dd6c28a34c8f1..77d6b005f92147 100644 --- a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll +++ b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll @@ -21,23 +21,21 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias % ; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]] ; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) ; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]] -; CHECK-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]] -; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = sub i64 0, [[MUL_RESULT3]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i8* [[TMP12]], [[A5]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i8* [[TMP11]], [[A5]] +; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW4]] +; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP9]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] ; CHECK: for.body.ph.lver.orig: ; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; CHECK: for.body.lver.orig: @@ -170,23 +168,21 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3 ; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]] ; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) ; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[TMP11]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*) -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*) -; CHECK-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]] -; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = sub i64 0, [[MUL_RESULT3]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[MUL_RESULT3]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i8* [[TMP12]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*) +; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i8* [[TMP11]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*) +; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW4]] +; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP9]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] ; CHECK: for.body.ph.lver.orig: ; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; CHECK: for.body.lver.orig: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index aac7e375171667..699c1077d7ae07 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -83,33 +83,32 @@ define void @test_stride-1_4i32(i32* readonly %data, i32* noalias nocapture %dst ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 2, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = select i1 true, i1 [[TMP3]], i1 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP7:%.*]] = or i1 false, [[TMP6]] -; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP6:%.*]] = or i1 false, [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i32 [[TMP8]], -1 -; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i32 [[TMP9]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i32 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 -3 -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = mul nuw nsw i32 [[TMP7]], -1 +; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i32 [[TMP8]], 2 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i32 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 -3 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> , [[REVERSE]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP8]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP18]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> , [[REVERSE]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP17]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] @@ -121,8 +120,8 @@ define void @test_stride-1_4i32(i32* readonly %data, i32* noalias nocapture %dst ; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], -1 ; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i32 [[ADD5]] -; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 -; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP20]] +; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP19]] ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[DST]], i32 [[I_023]] ; CHECK-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX9]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll index cdbc01c8a136f4..6a53d96d9ef18a 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll @@ -281,34 +281,33 @@ define void @strides_different_direction(i32* noalias nocapture %A, i32* noalias ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[N]], [[MUL_RESULT]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], [[N]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP0]], [[N]] -; CHECK-NEXT: [[TMP4:%.*]] = select i1 true, i1 [[TMP2]], i1 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP6:%.*]] = or i1 false, [[TMP5]] -; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 false, [[TMP4]] +; CHECK-NEXT: br i1 [[TMP5]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = sub nsw i32 [[N]], [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i32 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 -3 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = sub nsw i32 [[N]], [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i32 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 -3 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD1]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[REVERSE]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP7]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP19]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[REVERSE]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], 428 -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], 428 +; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 431, 428 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -320,11 +319,11 @@ define void @strides_different_direction(i32* noalias nocapture %A, i32* noalias ; CHECK: for.body: ; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD3:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[I_09]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[N]], [[I_09]] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 [[SUB]] -; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_09]] ; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ADD3]] = add nuw nsw i32 [[I_09]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll index d734763802f536..4c378f0fa4a30a 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -545,12 +545,11 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-TWO-CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]] ; VF-TWO-CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]] ; VF-TWO-CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]] -; VF-TWO-CHECK-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]] -; VF-TWO-CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 -; VF-TWO-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] -; VF-TWO-CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] -; VF-TWO-CHECK-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] -; VF-TWO-CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; VF-TWO-CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 +; VF-TWO-CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] +; VF-TWO-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] +; VF-TWO-CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] +; VF-TWO-CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; VF-TWO-CHECK: vector.main.loop.iter.check: ; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 ; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -560,23 +559,24 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-TWO-CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; VF-TWO-CHECK: vector.body: ; VF-TWO-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VF-TWO-CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 -; VF-TWO-CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 4 -; VF-TWO-CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 8 -; VF-TWO-CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 12 -; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 16 -; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 20 -; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 24 -; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 28 +; VF-TWO-CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 +; VF-TWO-CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 4 +; VF-TWO-CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 8 +; VF-TWO-CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 12 +; VF-TWO-CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 16 +; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 20 +; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 24 +; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 28 ; VF-TWO-CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], 0 -; VF-TWO-CHECK-NEXT: [[TMP21:%.*]] = add i32 [[OFFSET_IDX]], 4 -; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = add i32 [[OFFSET_IDX]], 8 -; VF-TWO-CHECK-NEXT: [[TMP23:%.*]] = add i32 [[OFFSET_IDX]], 12 -; VF-TWO-CHECK-NEXT: [[TMP24:%.*]] = add i32 [[OFFSET_IDX]], 16 -; VF-TWO-CHECK-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], 20 -; VF-TWO-CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], 24 -; VF-TWO-CHECK-NEXT: [[TMP27:%.*]] = add i32 [[OFFSET_IDX]], 28 +; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = add i32 [[OFFSET_IDX]], 0 +; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], 4 +; VF-TWO-CHECK-NEXT: [[TMP21:%.*]] = add i32 [[OFFSET_IDX]], 8 +; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = add i32 [[OFFSET_IDX]], 12 +; VF-TWO-CHECK-NEXT: [[TMP23:%.*]] = add i32 [[OFFSET_IDX]], 16 +; VF-TWO-CHECK-NEXT: [[TMP24:%.*]] = add i32 [[OFFSET_IDX]], 20 +; VF-TWO-CHECK-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], 24 +; VF-TWO-CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], 28 +; VF-TWO-CHECK-NEXT: [[TMP27:%.*]] = xor i32 [[TMP19]], -1 ; VF-TWO-CHECK-NEXT: [[TMP28:%.*]] = xor i32 [[TMP20]], -1 ; VF-TWO-CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP21]], -1 ; VF-TWO-CHECK-NEXT: [[TMP30:%.*]] = xor i32 [[TMP22]], -1 @@ -584,7 +584,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-TWO-CHECK-NEXT: [[TMP32:%.*]] = xor i32 [[TMP24]], -1 ; VF-TWO-CHECK-NEXT: [[TMP33:%.*]] = xor i32 [[TMP25]], -1 ; VF-TWO-CHECK-NEXT: [[TMP34:%.*]] = xor i32 [[TMP26]], -1 -; VF-TWO-CHECK-NEXT: [[TMP35:%.*]] = xor i32 [[TMP27]], -1 +; VF-TWO-CHECK-NEXT: [[TMP35:%.*]] = add i32 [[TMP27]], [[N]] ; VF-TWO-CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP28]], [[N]] ; VF-TWO-CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP29]], [[N]] ; VF-TWO-CHECK-NEXT: [[TMP38:%.*]] = add i32 [[TMP30]], [[N]] @@ -592,7 +592,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-TWO-CHECK-NEXT: [[TMP40:%.*]] = add i32 [[TMP32]], [[N]] ; VF-TWO-CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP33]], [[N]] ; VF-TWO-CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP34]], [[N]] -; VF-TWO-CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP35]], [[N]] +; VF-TWO-CHECK-NEXT: [[TMP43:%.*]] = sext i32 [[TMP35]] to i64 ; VF-TWO-CHECK-NEXT: [[TMP44:%.*]] = sext i32 [[TMP36]] to i64 ; VF-TWO-CHECK-NEXT: [[TMP45:%.*]] = sext i32 [[TMP37]] to i64 ; VF-TWO-CHECK-NEXT: [[TMP46:%.*]] = sext i32 [[TMP38]] to i64 @@ -600,98 +600,97 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = sext i32 [[TMP40]] to i64 ; VF-TWO-CHECK-NEXT: [[TMP49:%.*]] = sext i32 [[TMP41]] to i64 ; VF-TWO-CHECK-NEXT: [[TMP50:%.*]] = sext i32 [[TMP42]] to i64 -; VF-TWO-CHECK-NEXT: [[TMP51:%.*]] = sext i32 [[TMP43]] to i64 -; VF-TWO-CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP44]] +; VF-TWO-CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP43]] +; VF-TWO-CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP44]] ; VF-TWO-CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP45]] ; VF-TWO-CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP46]] ; VF-TWO-CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP47]] ; VF-TWO-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP48]] ; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP49]] ; VF-TWO-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP50]] -; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP51]] -; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 0 -; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, float* [[TMP60]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = bitcast float* [[TMP61]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP62]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, float* [[TMP51]], i32 0 +; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, float* [[TMP59]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP60]] to <4 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP61]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> -; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -4 -; VF-TWO-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, float* [[TMP63]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = bitcast float* [[TMP64]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, <4 x float>* [[TMP65]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, float* [[TMP51]], i32 -4 +; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, float* [[TMP62]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP64:%.*]] = bitcast float* [[TMP63]] to <4 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, <4 x float>* [[TMP64]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x float> [[WIDE_LOAD2]], <4 x float> poison, <4 x i32> -; VF-TWO-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -8 -; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, float* [[TMP66]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP68:%.*]] = bitcast float* [[TMP67]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, <4 x float>* [[TMP68]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, float* [[TMP51]], i32 -8 +; VF-TWO-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, float* [[TMP65]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = bitcast float* [[TMP66]] to <4 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, <4 x float>* [[TMP67]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x float> [[WIDE_LOAD4]], <4 x float> poison, <4 x i32> -; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -12 -; VF-TWO-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, float* [[TMP69]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = bitcast float* [[TMP70]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, <4 x float>* [[TMP71]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, float* [[TMP51]], i32 -12 +; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, float* [[TMP68]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP70:%.*]] = bitcast float* [[TMP69]] to <4 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, <4 x float>* [[TMP70]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x float> [[WIDE_LOAD6]], <4 x float> poison, <4 x i32> -; VF-TWO-CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -16 -; VF-TWO-CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds float, float* [[TMP72]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP74:%.*]] = bitcast float* [[TMP73]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP74]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, float* [[TMP51]], i32 -16 +; VF-TWO-CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds float, float* [[TMP71]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP73:%.*]] = bitcast float* [[TMP72]] to <4 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP73]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x float> [[WIDE_LOAD8]], <4 x float> poison, <4 x i32> -; VF-TWO-CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -20 -; VF-TWO-CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, float* [[TMP75]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP77:%.*]] = bitcast float* [[TMP76]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP77]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds float, float* [[TMP51]], i32 -20 +; VF-TWO-CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds float, float* [[TMP74]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP76:%.*]] = bitcast float* [[TMP75]] to <4 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP76]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> -; VF-TWO-CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -24 -; VF-TWO-CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds float, float* [[TMP78]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP80:%.*]] = bitcast float* [[TMP79]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP80]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds float, float* [[TMP51]], i32 -24 +; VF-TWO-CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds float, float* [[TMP77]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP79:%.*]] = bitcast float* [[TMP78]] to <4 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP79]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> -; VF-TWO-CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -28 -; VF-TWO-CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, float* [[TMP81]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP83:%.*]] = bitcast float* [[TMP82]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP83]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, float* [[TMP51]], i32 -28 +; VF-TWO-CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds float, float* [[TMP80]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP82:%.*]] = bitcast float* [[TMP81]] to <4 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP82]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> -; VF-TWO-CHECK-NEXT: [[TMP84:%.*]] = fadd fast <4 x float> [[REVERSE]], -; VF-TWO-CHECK-NEXT: [[TMP85:%.*]] = fadd fast <4 x float> [[REVERSE3]], -; VF-TWO-CHECK-NEXT: [[TMP86:%.*]] = fadd fast <4 x float> [[REVERSE5]], -; VF-TWO-CHECK-NEXT: [[TMP87:%.*]] = fadd fast <4 x float> [[REVERSE7]], -; VF-TWO-CHECK-NEXT: [[TMP88:%.*]] = fadd fast <4 x float> [[REVERSE9]], -; VF-TWO-CHECK-NEXT: [[TMP89:%.*]] = fadd fast <4 x float> [[REVERSE11]], -; VF-TWO-CHECK-NEXT: [[TMP90:%.*]] = fadd fast <4 x float> [[REVERSE13]], -; VF-TWO-CHECK-NEXT: [[TMP91:%.*]] = fadd fast <4 x float> [[REVERSE15]], -; VF-TWO-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP12]] +; VF-TWO-CHECK-NEXT: [[TMP83:%.*]] = fadd fast <4 x float> [[REVERSE]], +; VF-TWO-CHECK-NEXT: [[TMP84:%.*]] = fadd fast <4 x float> [[REVERSE3]], +; VF-TWO-CHECK-NEXT: [[TMP85:%.*]] = fadd fast <4 x float> [[REVERSE5]], +; VF-TWO-CHECK-NEXT: [[TMP86:%.*]] = fadd fast <4 x float> [[REVERSE7]], +; VF-TWO-CHECK-NEXT: [[TMP87:%.*]] = fadd fast <4 x float> [[REVERSE9]], +; VF-TWO-CHECK-NEXT: [[TMP88:%.*]] = fadd fast <4 x float> [[REVERSE11]], +; VF-TWO-CHECK-NEXT: [[TMP89:%.*]] = fadd fast <4 x float> [[REVERSE13]], +; VF-TWO-CHECK-NEXT: [[TMP90:%.*]] = fadd fast <4 x float> [[REVERSE15]], +; VF-TWO-CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP11]] +; VF-TWO-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] ; VF-TWO-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP13]] ; VF-TWO-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP14]] ; VF-TWO-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP15]] ; VF-TWO-CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]] ; VF-TWO-CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP17]] ; VF-TWO-CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP18]] -; VF-TWO-CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP19]] -; VF-TWO-CHECK-NEXT: [[TMP100:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 0 -; VF-TWO-CHECK-NEXT: [[TMP101:%.*]] = bitcast float* [[TMP100]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP84]], <4 x float>* [[TMP101]], align 4 -; VF-TWO-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 4 -; VF-TWO-CHECK-NEXT: [[TMP103:%.*]] = bitcast float* [[TMP102]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP85]], <4 x float>* [[TMP103]], align 4 -; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 8 -; VF-TWO-CHECK-NEXT: [[TMP105:%.*]] = bitcast float* [[TMP104]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP86]], <4 x float>* [[TMP105]], align 4 -; VF-TWO-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 12 -; VF-TWO-CHECK-NEXT: [[TMP107:%.*]] = bitcast float* [[TMP106]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP87]], <4 x float>* [[TMP107]], align 4 -; VF-TWO-CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 16 -; VF-TWO-CHECK-NEXT: [[TMP109:%.*]] = bitcast float* [[TMP108]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP109]], align 4 -; VF-TWO-CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 20 -; VF-TWO-CHECK-NEXT: [[TMP111:%.*]] = bitcast float* [[TMP110]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP111]], align 4 -; VF-TWO-CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 24 -; VF-TWO-CHECK-NEXT: [[TMP113:%.*]] = bitcast float* [[TMP112]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP113]], align 4 -; VF-TWO-CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 28 -; VF-TWO-CHECK-NEXT: [[TMP115:%.*]] = bitcast float* [[TMP114]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP115]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds float, float* [[TMP91]], i32 0 +; VF-TWO-CHECK-NEXT: [[TMP100:%.*]] = bitcast float* [[TMP99]] to <4 x float>* +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP83]], <4 x float>* [[TMP100]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP101:%.*]] = getelementptr inbounds float, float* [[TMP91]], i32 4 +; VF-TWO-CHECK-NEXT: [[TMP102:%.*]] = bitcast float* [[TMP101]] to <4 x float>* +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP84]], <4 x float>* [[TMP102]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, float* [[TMP91]], i32 8 +; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = bitcast float* [[TMP103]] to <4 x float>* +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP85]], <4 x float>* [[TMP104]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP105:%.*]] = getelementptr inbounds float, float* [[TMP91]], i32 12 +; VF-TWO-CHECK-NEXT: [[TMP106:%.*]] = bitcast float* [[TMP105]] to <4 x float>* +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP86]], <4 x float>* [[TMP106]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds float, float* [[TMP91]], i32 16 +; VF-TWO-CHECK-NEXT: [[TMP108:%.*]] = bitcast float* [[TMP107]] to <4 x float>* +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP87]], <4 x float>* [[TMP108]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP109:%.*]] = getelementptr inbounds float, float* [[TMP91]], i32 20 +; VF-TWO-CHECK-NEXT: [[TMP110:%.*]] = bitcast float* [[TMP109]] to <4 x float>* +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP110]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP111:%.*]] = getelementptr inbounds float, float* [[TMP91]], i32 24 +; VF-TWO-CHECK-NEXT: [[TMP112:%.*]] = bitcast float* [[TMP111]] to <4 x float>* +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP112]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP113:%.*]] = getelementptr inbounds float, float* [[TMP91]], i32 28 +; VF-TWO-CHECK-NEXT: [[TMP114:%.*]] = bitcast float* [[TMP113]] to <4 x float>* +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP114]], align 4 ; VF-TWO-CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; VF-TWO-CHECK-NEXT: [[TMP116:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VF-TWO-CHECK-NEXT: br i1 [[TMP116]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VF-TWO-CHECK-NEXT: [[TMP115:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VF-TWO-CHECK-NEXT: br i1 [[TMP115]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF-TWO-CHECK: middle.block: ; VF-TWO-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; VF-TWO-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -708,26 +707,26 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-TWO-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; VF-TWO-CHECK: vec.epilog.vector.body: ; VF-TWO-CHECK-NEXT: [[INDEX18:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT19:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; VF-TWO-CHECK-NEXT: [[TMP117:%.*]] = add i64 [[INDEX18]], 0 +; VF-TWO-CHECK-NEXT: [[TMP116:%.*]] = add i64 [[INDEX18]], 0 ; VF-TWO-CHECK-NEXT: [[OFFSET_IDX23:%.*]] = trunc i64 [[INDEX18]] to i32 -; VF-TWO-CHECK-NEXT: [[TMP118:%.*]] = add i32 [[OFFSET_IDX23]], 0 -; VF-TWO-CHECK-NEXT: [[TMP119:%.*]] = xor i32 [[TMP118]], -1 -; VF-TWO-CHECK-NEXT: [[TMP120:%.*]] = add i32 [[TMP119]], [[N]] -; VF-TWO-CHECK-NEXT: [[TMP121:%.*]] = sext i32 [[TMP120]] to i64 -; VF-TWO-CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP121]] -; VF-TWO-CHECK-NEXT: [[TMP123:%.*]] = getelementptr inbounds float, float* [[TMP122]], i32 0 -; VF-TWO-CHECK-NEXT: [[TMP124:%.*]] = getelementptr inbounds float, float* [[TMP123]], i32 -1 -; VF-TWO-CHECK-NEXT: [[TMP125:%.*]] = bitcast float* [[TMP124]] to <2 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <2 x float>, <2 x float>* [[TMP125]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP117:%.*]] = add i32 [[OFFSET_IDX23]], 0 +; VF-TWO-CHECK-NEXT: [[TMP118:%.*]] = xor i32 [[TMP117]], -1 +; VF-TWO-CHECK-NEXT: [[TMP119:%.*]] = add i32 [[TMP118]], [[N]] +; VF-TWO-CHECK-NEXT: [[TMP120:%.*]] = sext i32 [[TMP119]] to i64 +; VF-TWO-CHECK-NEXT: [[TMP121:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP120]] +; VF-TWO-CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds float, float* [[TMP121]], i32 0 +; VF-TWO-CHECK-NEXT: [[TMP123:%.*]] = getelementptr inbounds float, float* [[TMP122]], i32 -1 +; VF-TWO-CHECK-NEXT: [[TMP124:%.*]] = bitcast float* [[TMP123]] to <2 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <2 x float>, <2 x float>* [[TMP124]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE25:%.*]] = shufflevector <2 x float> [[WIDE_LOAD24]], <2 x float> poison, <2 x i32> -; VF-TWO-CHECK-NEXT: [[TMP126:%.*]] = fadd fast <2 x float> [[REVERSE25]], -; VF-TWO-CHECK-NEXT: [[TMP127:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP117]] -; VF-TWO-CHECK-NEXT: [[TMP128:%.*]] = getelementptr inbounds float, float* [[TMP127]], i32 0 -; VF-TWO-CHECK-NEXT: [[TMP129:%.*]] = bitcast float* [[TMP128]] to <2 x float>* -; VF-TWO-CHECK-NEXT: store <2 x float> [[TMP126]], <2 x float>* [[TMP129]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP125:%.*]] = fadd fast <2 x float> [[REVERSE25]], +; VF-TWO-CHECK-NEXT: [[TMP126:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP116]] +; VF-TWO-CHECK-NEXT: [[TMP127:%.*]] = getelementptr inbounds float, float* [[TMP126]], i32 0 +; VF-TWO-CHECK-NEXT: [[TMP128:%.*]] = bitcast float* [[TMP127]] to <2 x float>* +; VF-TWO-CHECK-NEXT: store <2 x float> [[TMP125]], <2 x float>* [[TMP128]], align 4 ; VF-TWO-CHECK-NEXT: [[INDEX_NEXT19]] = add nuw i64 [[INDEX18]], 2 -; VF-TWO-CHECK-NEXT: [[TMP130:%.*]] = icmp eq i64 [[INDEX_NEXT19]], [[N_VEC17]] -; VF-TWO-CHECK-NEXT: br i1 [[TMP130]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF-TWO-CHECK-NEXT: [[TMP129:%.*]] = icmp eq i64 [[INDEX_NEXT19]], [[N_VEC17]] +; VF-TWO-CHECK-NEXT: br i1 [[TMP129]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF-TWO-CHECK: vec.epilog.middle.block: ; VF-TWO-CHECK-NEXT: [[CMP_N22:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC17]] ; VF-TWO-CHECK-NEXT: br i1 [[CMP_N22]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] @@ -738,12 +737,12 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-TWO-CHECK: for.body: ; VF-TWO-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; VF-TWO-CHECK-NEXT: [[I_014:%.*]] = phi i32 [ [[BC_RESUME_VAL20]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; VF-TWO-CHECK-NEXT: [[TMP131:%.*]] = xor i32 [[I_014]], -1 -; VF-TWO-CHECK-NEXT: [[SUB2:%.*]] = add i32 [[TMP131]], [[N]] +; VF-TWO-CHECK-NEXT: [[TMP130:%.*]] = xor i32 [[I_014]], -1 +; VF-TWO-CHECK-NEXT: [[SUB2:%.*]] = add i32 [[TMP130]], [[N]] ; VF-TWO-CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB2]] to i64 ; VF-TWO-CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IDXPROM]] -; VF-TWO-CHECK-NEXT: [[TMP132:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; VF-TWO-CHECK-NEXT: [[CONV3:%.*]] = fadd fast float [[TMP132]], 1.000000e+00 +; VF-TWO-CHECK-NEXT: [[TMP131:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; VF-TWO-CHECK-NEXT: [[CONV3:%.*]] = fadd fast float [[TMP131]], 1.000000e+00 ; VF-TWO-CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] ; VF-TWO-CHECK-NEXT: store float [[CONV3]], float* [[ARRAYIDX5]], align 4 ; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -776,12 +775,11 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-FOUR-CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]] ; VF-FOUR-CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]] ; VF-FOUR-CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]] -; VF-FOUR-CHECK-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]] -; VF-FOUR-CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 -; VF-FOUR-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] -; VF-FOUR-CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] -; VF-FOUR-CHECK-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] -; VF-FOUR-CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; VF-FOUR-CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 +; VF-FOUR-CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] +; VF-FOUR-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] +; VF-FOUR-CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] +; VF-FOUR-CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; VF-FOUR-CHECK: vector.main.loop.iter.check: ; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 ; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -791,23 +789,24 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-FOUR-CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; VF-FOUR-CHECK: vector.body: ; VF-FOUR-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VF-FOUR-CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 -; VF-FOUR-CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 4 -; VF-FOUR-CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 8 -; VF-FOUR-CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 12 -; VF-FOUR-CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 16 -; VF-FOUR-CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 20 -; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 24 -; VF-FOUR-CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 28 +; VF-FOUR-CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 +; VF-FOUR-CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 4 +; VF-FOUR-CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 8 +; VF-FOUR-CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 12 +; VF-FOUR-CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 16 +; VF-FOUR-CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 20 +; VF-FOUR-CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 24 +; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 28 ; VF-FOUR-CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], 0 -; VF-FOUR-CHECK-NEXT: [[TMP21:%.*]] = add i32 [[OFFSET_IDX]], 4 -; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = add i32 [[OFFSET_IDX]], 8 -; VF-FOUR-CHECK-NEXT: [[TMP23:%.*]] = add i32 [[OFFSET_IDX]], 12 -; VF-FOUR-CHECK-NEXT: [[TMP24:%.*]] = add i32 [[OFFSET_IDX]], 16 -; VF-FOUR-CHECK-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], 20 -; VF-FOUR-CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], 24 -; VF-FOUR-CHECK-NEXT: [[TMP27:%.*]] = add i32 [[OFFSET_IDX]], 28 +; VF-FOUR-CHECK-NEXT: [[TMP19:%.*]] = add i32 [[OFFSET_IDX]], 0 +; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], 4 +; VF-FOUR-CHECK-NEXT: [[TMP21:%.*]] = add i32 [[OFFSET_IDX]], 8 +; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = add i32 [[OFFSET_IDX]], 12 +; VF-FOUR-CHECK-NEXT: [[TMP23:%.*]] = add i32 [[OFFSET_IDX]], 16 +; VF-FOUR-CHECK-NEXT: [[TMP24:%.*]] = add i32 [[OFFSET_IDX]], 20 +; VF-FOUR-CHECK-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], 24 +; VF-FOUR-CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], 28 +; VF-FOUR-CHECK-NEXT: [[TMP27:%.*]] = xor i32 [[TMP19]], -1 ; VF-FOUR-CHECK-NEXT: [[TMP28:%.*]] = xor i32 [[TMP20]], -1 ; VF-FOUR-CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP21]], -1 ; VF-FOUR-CHECK-NEXT: [[TMP30:%.*]] = xor i32 [[TMP22]], -1 @@ -815,7 +814,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-FOUR-CHECK-NEXT: [[TMP32:%.*]] = xor i32 [[TMP24]], -1 ; VF-FOUR-CHECK-NEXT: [[TMP33:%.*]] = xor i32 [[TMP25]], -1 ; VF-FOUR-CHECK-NEXT: [[TMP34:%.*]] = xor i32 [[TMP26]], -1 -; VF-FOUR-CHECK-NEXT: [[TMP35:%.*]] = xor i32 [[TMP27]], -1 +; VF-FOUR-CHECK-NEXT: [[TMP35:%.*]] = add i32 [[TMP27]], [[N]] ; VF-FOUR-CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP28]], [[N]] ; VF-FOUR-CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP29]], [[N]] ; VF-FOUR-CHECK-NEXT: [[TMP38:%.*]] = add i32 [[TMP30]], [[N]] @@ -823,7 +822,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-FOUR-CHECK-NEXT: [[TMP40:%.*]] = add i32 [[TMP32]], [[N]] ; VF-FOUR-CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP33]], [[N]] ; VF-FOUR-CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP34]], [[N]] -; VF-FOUR-CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP35]], [[N]] +; VF-FOUR-CHECK-NEXT: [[TMP43:%.*]] = sext i32 [[TMP35]] to i64 ; VF-FOUR-CHECK-NEXT: [[TMP44:%.*]] = sext i32 [[TMP36]] to i64 ; VF-FOUR-CHECK-NEXT: [[TMP45:%.*]] = sext i32 [[TMP37]] to i64 ; VF-FOUR-CHECK-NEXT: [[TMP46:%.*]] = sext i32 [[TMP38]] to i64 @@ -831,98 +830,97 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-FOUR-CHECK-NEXT: [[TMP48:%.*]] = sext i32 [[TMP40]] to i64 ; VF-FOUR-CHECK-NEXT: [[TMP49:%.*]] = sext i32 [[TMP41]] to i64 ; VF-FOUR-CHECK-NEXT: [[TMP50:%.*]] = sext i32 [[TMP42]] to i64 -; VF-FOUR-CHECK-NEXT: [[TMP51:%.*]] = sext i32 [[TMP43]] to i64 -; VF-FOUR-CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP44]] +; VF-FOUR-CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP43]] +; VF-FOUR-CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP44]] ; VF-FOUR-CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP45]] ; VF-FOUR-CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP46]] ; VF-FOUR-CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP47]] ; VF-FOUR-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP48]] ; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP49]] ; VF-FOUR-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP50]] -; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP51]] -; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 0 -; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, float* [[TMP60]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = bitcast float* [[TMP61]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP62]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, float* [[TMP51]], i32 0 +; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, float* [[TMP59]], i32 -3 +; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP60]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP61]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> -; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -4 -; VF-FOUR-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, float* [[TMP63]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = bitcast float* [[TMP64]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, <4 x float>* [[TMP65]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, float* [[TMP51]], i32 -4 +; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, float* [[TMP62]], i32 -3 +; VF-FOUR-CHECK-NEXT: [[TMP64:%.*]] = bitcast float* [[TMP63]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, <4 x float>* [[TMP64]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x float> [[WIDE_LOAD2]], <4 x float> poison, <4 x i32> -; VF-FOUR-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -8 -; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, float* [[TMP66]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP68:%.*]] = bitcast float* [[TMP67]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, <4 x float>* [[TMP68]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, float* [[TMP51]], i32 -8 +; VF-FOUR-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, float* [[TMP65]], i32 -3 +; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = bitcast float* [[TMP66]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, <4 x float>* [[TMP67]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x float> [[WIDE_LOAD4]], <4 x float> poison, <4 x i32> -; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -12 -; VF-FOUR-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, float* [[TMP69]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = bitcast float* [[TMP70]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, <4 x float>* [[TMP71]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, float* [[TMP51]], i32 -12 +; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, float* [[TMP68]], i32 -3 +; VF-FOUR-CHECK-NEXT: [[TMP70:%.*]] = bitcast float* [[TMP69]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, <4 x float>* [[TMP70]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x float> [[WIDE_LOAD6]], <4 x float> poison, <4 x i32> -; VF-FOUR-CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -16 -; VF-FOUR-CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds float, float* [[TMP72]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP74:%.*]] = bitcast float* [[TMP73]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP74]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, float* [[TMP51]], i32 -16 +; VF-FOUR-CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds float, float* [[TMP71]], i32 -3 +; VF-FOUR-CHECK-NEXT: [[TMP73:%.*]] = bitcast float* [[TMP72]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP73]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x float> [[WIDE_LOAD8]], <4 x float> poison, <4 x i32> -; VF-FOUR-CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -20 -; VF-FOUR-CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, float* [[TMP75]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP77:%.*]] = bitcast float* [[TMP76]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP77]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds float, float* [[TMP51]], i32 -20 +; VF-FOUR-CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds float, float* [[TMP74]], i32 -3 +; VF-FOUR-CHECK-NEXT: [[TMP76:%.*]] = bitcast float* [[TMP75]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP76]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> -; VF-FOUR-CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -24 -; VF-FOUR-CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds float, float* [[TMP78]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP80:%.*]] = bitcast float* [[TMP79]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP80]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds float, float* [[TMP51]], i32 -24 +; VF-FOUR-CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds float, float* [[TMP77]], i32 -3 +; VF-FOUR-CHECK-NEXT: [[TMP79:%.*]] = bitcast float* [[TMP78]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP79]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> -; VF-FOUR-CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -28 -; VF-FOUR-CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, float* [[TMP81]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP83:%.*]] = bitcast float* [[TMP82]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP83]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, float* [[TMP51]], i32 -28 +; VF-FOUR-CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds float, float* [[TMP80]], i32 -3 +; VF-FOUR-CHECK-NEXT: [[TMP82:%.*]] = bitcast float* [[TMP81]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP82]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> -; VF-FOUR-CHECK-NEXT: [[TMP84:%.*]] = fadd fast <4 x float> [[REVERSE]], -; VF-FOUR-CHECK-NEXT: [[TMP85:%.*]] = fadd fast <4 x float> [[REVERSE3]], -; VF-FOUR-CHECK-NEXT: [[TMP86:%.*]] = fadd fast <4 x float> [[REVERSE5]], -; VF-FOUR-CHECK-NEXT: [[TMP87:%.*]] = fadd fast <4 x float> [[REVERSE7]], -; VF-FOUR-CHECK-NEXT: [[TMP88:%.*]] = fadd fast <4 x float> [[REVERSE9]], -; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = fadd fast <4 x float> [[REVERSE11]], -; VF-FOUR-CHECK-NEXT: [[TMP90:%.*]] = fadd fast <4 x float> [[REVERSE13]], -; VF-FOUR-CHECK-NEXT: [[TMP91:%.*]] = fadd fast <4 x float> [[REVERSE15]], -; VF-FOUR-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP12]] +; VF-FOUR-CHECK-NEXT: [[TMP83:%.*]] = fadd fast <4 x float> [[REVERSE]], +; VF-FOUR-CHECK-NEXT: [[TMP84:%.*]] = fadd fast <4 x float> [[REVERSE3]], +; VF-FOUR-CHECK-NEXT: [[TMP85:%.*]] = fadd fast <4 x float> [[REVERSE5]], +; VF-FOUR-CHECK-NEXT: [[TMP86:%.*]] = fadd fast <4 x float> [[REVERSE7]], +; VF-FOUR-CHECK-NEXT: [[TMP87:%.*]] = fadd fast <4 x float> [[REVERSE9]], +; VF-FOUR-CHECK-NEXT: [[TMP88:%.*]] = fadd fast <4 x float> [[REVERSE11]], +; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = fadd fast <4 x float> [[REVERSE13]], +; VF-FOUR-CHECK-NEXT: [[TMP90:%.*]] = fadd fast <4 x float> [[REVERSE15]], +; VF-FOUR-CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP11]] +; VF-FOUR-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] ; VF-FOUR-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP13]] ; VF-FOUR-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP14]] ; VF-FOUR-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP15]] ; VF-FOUR-CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]] ; VF-FOUR-CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP17]] ; VF-FOUR-CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP18]] -; VF-FOUR-CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP19]] -; VF-FOUR-CHECK-NEXT: [[TMP100:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 0 -; VF-FOUR-CHECK-NEXT: [[TMP101:%.*]] = bitcast float* [[TMP100]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP84]], <4 x float>* [[TMP101]], align 4 -; VF-FOUR-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 4 -; VF-FOUR-CHECK-NEXT: [[TMP103:%.*]] = bitcast float* [[TMP102]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP85]], <4 x float>* [[TMP103]], align 4 -; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 8 -; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = bitcast float* [[TMP104]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP86]], <4 x float>* [[TMP105]], align 4 -; VF-FOUR-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 12 -; VF-FOUR-CHECK-NEXT: [[TMP107:%.*]] = bitcast float* [[TMP106]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP87]], <4 x float>* [[TMP107]], align 4 -; VF-FOUR-CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 16 -; VF-FOUR-CHECK-NEXT: [[TMP109:%.*]] = bitcast float* [[TMP108]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP109]], align 4 -; VF-FOUR-CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 20 -; VF-FOUR-CHECK-NEXT: [[TMP111:%.*]] = bitcast float* [[TMP110]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP111]], align 4 -; VF-FOUR-CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 24 -; VF-FOUR-CHECK-NEXT: [[TMP113:%.*]] = bitcast float* [[TMP112]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP113]], align 4 -; VF-FOUR-CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 28 -; VF-FOUR-CHECK-NEXT: [[TMP115:%.*]] = bitcast float* [[TMP114]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP115]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds float, float* [[TMP91]], i32 0 +; VF-FOUR-CHECK-NEXT: [[TMP100:%.*]] = bitcast float* [[TMP99]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP83]], <4 x float>* [[TMP100]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP101:%.*]] = getelementptr inbounds float, float* [[TMP91]], i32 4 +; VF-FOUR-CHECK-NEXT: [[TMP102:%.*]] = bitcast float* [[TMP101]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP84]], <4 x float>* [[TMP102]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, float* [[TMP91]], i32 8 +; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = bitcast float* [[TMP103]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP85]], <4 x float>* [[TMP104]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = getelementptr inbounds float, float* [[TMP91]], i32 12 +; VF-FOUR-CHECK-NEXT: [[TMP106:%.*]] = bitcast float* [[TMP105]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP86]], <4 x float>* [[TMP106]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds float, float* [[TMP91]], i32 16 +; VF-FOUR-CHECK-NEXT: [[TMP108:%.*]] = bitcast float* [[TMP107]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP87]], <4 x float>* [[TMP108]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP109:%.*]] = getelementptr inbounds float, float* [[TMP91]], i32 20 +; VF-FOUR-CHECK-NEXT: [[TMP110:%.*]] = bitcast float* [[TMP109]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP110]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP111:%.*]] = getelementptr inbounds float, float* [[TMP91]], i32 24 +; VF-FOUR-CHECK-NEXT: [[TMP112:%.*]] = bitcast float* [[TMP111]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP112]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP113:%.*]] = getelementptr inbounds float, float* [[TMP91]], i32 28 +; VF-FOUR-CHECK-NEXT: [[TMP114:%.*]] = bitcast float* [[TMP113]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP114]], align 4 ; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; VF-FOUR-CHECK-NEXT: [[TMP116:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VF-FOUR-CHECK-NEXT: br i1 [[TMP116]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VF-FOUR-CHECK-NEXT: [[TMP115:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VF-FOUR-CHECK-NEXT: br i1 [[TMP115]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF-FOUR-CHECK: middle.block: ; VF-FOUR-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -939,26 +937,26 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-FOUR-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; VF-FOUR-CHECK: vec.epilog.vector.body: ; VF-FOUR-CHECK-NEXT: [[INDEX18:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT19:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; VF-FOUR-CHECK-NEXT: [[TMP117:%.*]] = add i64 [[INDEX18]], 0 +; VF-FOUR-CHECK-NEXT: [[TMP116:%.*]] = add i64 [[INDEX18]], 0 ; VF-FOUR-CHECK-NEXT: [[OFFSET_IDX23:%.*]] = trunc i64 [[INDEX18]] to i32 -; VF-FOUR-CHECK-NEXT: [[TMP118:%.*]] = add i32 [[OFFSET_IDX23]], 0 -; VF-FOUR-CHECK-NEXT: [[TMP119:%.*]] = xor i32 [[TMP118]], -1 -; VF-FOUR-CHECK-NEXT: [[TMP120:%.*]] = add i32 [[TMP119]], [[N]] -; VF-FOUR-CHECK-NEXT: [[TMP121:%.*]] = sext i32 [[TMP120]] to i64 -; VF-FOUR-CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP121]] -; VF-FOUR-CHECK-NEXT: [[TMP123:%.*]] = getelementptr inbounds float, float* [[TMP122]], i32 0 -; VF-FOUR-CHECK-NEXT: [[TMP124:%.*]] = getelementptr inbounds float, float* [[TMP123]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP125:%.*]] = bitcast float* [[TMP124]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <4 x float>, <4 x float>* [[TMP125]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP117:%.*]] = add i32 [[OFFSET_IDX23]], 0 +; VF-FOUR-CHECK-NEXT: [[TMP118:%.*]] = xor i32 [[TMP117]], -1 +; VF-FOUR-CHECK-NEXT: [[TMP119:%.*]] = add i32 [[TMP118]], [[N]] +; VF-FOUR-CHECK-NEXT: [[TMP120:%.*]] = sext i32 [[TMP119]] to i64 +; VF-FOUR-CHECK-NEXT: [[TMP121:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP120]] +; VF-FOUR-CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds float, float* [[TMP121]], i32 0 +; VF-FOUR-CHECK-NEXT: [[TMP123:%.*]] = getelementptr inbounds float, float* [[TMP122]], i32 -3 +; VF-FOUR-CHECK-NEXT: [[TMP124:%.*]] = bitcast float* [[TMP123]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <4 x float>, <4 x float>* [[TMP124]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE25:%.*]] = shufflevector <4 x float> [[WIDE_LOAD24]], <4 x float> poison, <4 x i32> -; VF-FOUR-CHECK-NEXT: [[TMP126:%.*]] = fadd fast <4 x float> [[REVERSE25]], -; VF-FOUR-CHECK-NEXT: [[TMP127:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP117]] -; VF-FOUR-CHECK-NEXT: [[TMP128:%.*]] = getelementptr inbounds float, float* [[TMP127]], i32 0 -; VF-FOUR-CHECK-NEXT: [[TMP129:%.*]] = bitcast float* [[TMP128]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP126]], <4 x float>* [[TMP129]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP125:%.*]] = fadd fast <4 x float> [[REVERSE25]], +; VF-FOUR-CHECK-NEXT: [[TMP126:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP116]] +; VF-FOUR-CHECK-NEXT: [[TMP127:%.*]] = getelementptr inbounds float, float* [[TMP126]], i32 0 +; VF-FOUR-CHECK-NEXT: [[TMP128:%.*]] = bitcast float* [[TMP127]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP125]], <4 x float>* [[TMP128]], align 4 ; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT19]] = add nuw i64 [[INDEX18]], 4 -; VF-FOUR-CHECK-NEXT: [[TMP130:%.*]] = icmp eq i64 [[INDEX_NEXT19]], [[N_VEC17]] -; VF-FOUR-CHECK-NEXT: br i1 [[TMP130]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF-FOUR-CHECK-NEXT: [[TMP129:%.*]] = icmp eq i64 [[INDEX_NEXT19]], [[N_VEC17]] +; VF-FOUR-CHECK-NEXT: br i1 [[TMP129]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF-FOUR-CHECK: vec.epilog.middle.block: ; VF-FOUR-CHECK-NEXT: [[CMP_N22:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC17]] ; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N22]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] @@ -969,12 +967,12 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-FOUR-CHECK: for.body: ; VF-FOUR-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; VF-FOUR-CHECK-NEXT: [[I_014:%.*]] = phi i32 [ [[BC_RESUME_VAL20]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; VF-FOUR-CHECK-NEXT: [[TMP131:%.*]] = xor i32 [[I_014]], -1 -; VF-FOUR-CHECK-NEXT: [[SUB2:%.*]] = add i32 [[TMP131]], [[N]] +; VF-FOUR-CHECK-NEXT: [[TMP130:%.*]] = xor i32 [[I_014]], -1 +; VF-FOUR-CHECK-NEXT: [[SUB2:%.*]] = add i32 [[TMP130]], [[N]] ; VF-FOUR-CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB2]] to i64 ; VF-FOUR-CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IDXPROM]] -; VF-FOUR-CHECK-NEXT: [[TMP132:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; VF-FOUR-CHECK-NEXT: [[CONV3:%.*]] = fadd fast float [[TMP132]], 1.000000e+00 +; VF-FOUR-CHECK-NEXT: [[TMP131:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; VF-FOUR-CHECK-NEXT: [[CONV3:%.*]] = fadd fast float [[TMP131]], 1.000000e+00 ; VF-FOUR-CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] ; VF-FOUR-CHECK-NEXT: store float [[CONV3]], float* [[ARRAYIDX5]], align 4 ; VF-FOUR-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll index 52dd2565871e9a..35870d44d7c0a0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -66,39 +66,36 @@ define void @foo(i32* nocapture %a, i32* nocapture %b, i32 %k, i32 %m) #0 { ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP8]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp slt i32 [[TMP10]], [[TMP8]] -; CHECK-NEXT: [[TMP14:%.*]] = select i1 false, i1 [[TMP12]], i1 [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP16:%.*]] = or i1 false, [[TMP15]] -; CHECK-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp slt i32 [[TMP10]], [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP13:%.*]] = or i1 false, [[TMP12]] +; CHECK-NEXT: br i1 [[TMP13]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP17]], 0 -; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[ADD_US]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[TMP19]] to i64 -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]] -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 0 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP23]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP24]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[ADD_US]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP20]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP21]], i32 0 +; CHECK-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP21]], i32 1 +; CHECK-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP21]], i32 2 +; CHECK-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP21]], i32 3 ; CHECK-NEXT: store i32 [[TMP25]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP24]], i32 1 -; CHECK-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP24]], i32 2 -; CHECK-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP24]], i32 3 -; CHECK-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_US]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll index c3c613bec944d4..80d0d80750130f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -56,44 +56,43 @@ define i32 @main() local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[TMP7]], [[MUL_RESULT]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i8 [[TMP12]], [[TMP7]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i8 [[TMP11]], [[TMP7]] -; CHECK-NEXT: [[TMP15:%.*]] = select i1 true, i1 [[TMP13]], i1 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[TMP9]], 255 -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP19:%.*]] = or i1 false, [[TMP18]] -; CHECK-NEXT: br i1 [[TMP19]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ugt i32 [[TMP9]], 255 +; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP13]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 false, [[TMP17]] +; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP6]], 8 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP6]], [[N_MOD_VF]] ; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[IND_END:%.*]] = sub i8 [[CONV3]], [[CAST_CRD]] -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP20]], [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = add i8 [[OFFSET_IDX]], -4 -; CHECK-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP25]] = add <4 x i32> [[VEC_PHI2]], +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP19]], [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP20:%.*]] = trunc i32 [[INDEX]] to i8 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = add i8 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = add i8 [[OFFSET_IDX]], -4 +; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI2]], +; CHECK-NEXT: [[TMP25:%.*]] = add i8 [[TMP21]], -1 ; CHECK-NEXT: [[TMP26:%.*]] = add i8 [[TMP22]], -1 -; CHECK-NEXT: [[TMP27:%.*]] = add i8 [[TMP23]], -1 +; CHECK-NEXT: [[TMP27:%.*]] = zext i8 [[TMP25]] to i32 ; CHECK-NEXT: [[TMP28:%.*]] = zext i8 [[TMP26]] to i32 -; CHECK-NEXT: [[TMP29:%.*]] = zext i8 [[TMP27]] to i32 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP25]], [[TMP24]] -; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP24]], [[TMP23]] +; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP6]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP30]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY8:%.*]] ; CHECK: for.body8: ; CHECK-NEXT: [[INC5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ] @@ -104,7 +103,7 @@ define i32 @main() local_unnamed_addr #0 { ; CHECK-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP2]], [[CONV5]] ; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: for.cond4.for.inc9_crit_edge: -; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP30]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i32 [[INC_LCSSA]], i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16 ; CHECK-NEXT: br label [[FOR_INC9]] ; CHECK: for.inc9: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index cad57883f4176a..a4776351981df9 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -652,36 +652,35 @@ define void @sink_dominance(i32* %ptr, i32 %N) { ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = select i1 false, i1 [[TMP3]], i1 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP7:%.*]] = or i1 false, [[TMP6]] -; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP6:%.*]] = or i1 false, [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP12]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP12]], <4 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = trunc <4 x i64> [[TMP13]] to <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = icmp slt <4 x i32> [[TMP14]], -; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> [[TMP14]], <4 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP17]], align 4 +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP11]], <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i64> [[TMP12]] to <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = icmp slt <4 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> [[TMP13]], <4 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP12]], i32 3 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP12]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP11]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP11]], i32 2 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -745,38 +744,37 @@ define void @sink_dominance_2(i32* %ptr, i32 %N) { ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = select i1 false, i1 [[TMP3]], i1 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP7:%.*]] = or i1 false, [[TMP6]] -; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP6:%.*]] = or i1 false, [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP12]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP12]], <4 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = trunc <4 x i64> [[TMP13]] to <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i32> [[TMP14]], -; CHECK-NEXT: [[TMP16:%.*]] = mul <4 x i32> [[TMP15]], -; CHECK-NEXT: [[TMP17:%.*]] = icmp slt <4 x i32> [[TMP14]], -; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP17]], <4 x i32> [[TMP14]], <4 x i32> [[TMP16]] -; CHECK-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP18]], <4 x i32>* [[TMP19]], align 4 +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP11]], <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i64> [[TMP12]] to <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP15:%.*]] = mul <4 x i32> [[TMP14]], +; CHECK-NEXT: [[TMP16:%.*]] = icmp slt <4 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP16]], <4 x i32> [[TMP13]], <4 x i32> [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP12]], i32 3 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP12]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP11]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP11]], i32 2 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 5b249f0dc24c95..f874358c00dce0 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -175,12 +175,11 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]] -; CHECK-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] -; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] +; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -190,26 +189,26 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = xor i32 [[TMP13]], -1 -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[N]] -; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 -3 -; CHECK-NEXT: [[TMP20:%.*]] = bitcast float* [[TMP19]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP20]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = xor i32 [[TMP12]], -1 +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[N]] +; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[TMP16]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 -3 +; CHECK-NEXT: [[TMP19:%.*]] = bitcast float* [[TMP18]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP19]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP21:%.*]] = fadd fast <4 x float> [[REVERSE]], -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 0 -; CHECK-NEXT: [[TMP24:%.*]] = bitcast float* [[TMP23]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* [[TMP24]], align 4 +; CHECK-NEXT: [[TMP20:%.*]] = fadd fast <4 x float> [[REVERSE]], +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP22]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP20]], <4 x float>* [[TMP23]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -226,26 +225,26 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[INDEX4]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[INDEX4]], 0 ; CHECK-NEXT: [[OFFSET_IDX9:%.*]] = trunc i64 [[INDEX4]] to i32 -; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[OFFSET_IDX9]], 0 -; CHECK-NEXT: [[TMP28:%.*]] = xor i32 [[TMP27]], -1 -; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], [[N]] -; CHECK-NEXT: [[TMP30:%.*]] = sext i32 [[TMP29]] to i64 -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP30]] -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[TMP31]], i32 0 -; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, float* [[TMP32]], i32 -3 -; CHECK-NEXT: [[TMP34:%.*]] = bitcast float* [[TMP33]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP34]], align 4 +; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX9]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = xor i32 [[TMP26]], -1 +; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], [[N]] +; CHECK-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[TMP30]], i32 0 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[TMP31]], i32 -3 +; CHECK-NEXT: [[TMP33:%.*]] = bitcast float* [[TMP32]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP33]], align 4 ; CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP35:%.*]] = fadd fast <4 x float> [[REVERSE11]], -; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP26]] -; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, float* [[TMP36]], i32 0 -; CHECK-NEXT: [[TMP38:%.*]] = bitcast float* [[TMP37]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP35]], <4 x float>* [[TMP38]], align 4 +; CHECK-NEXT: [[TMP34:%.*]] = fadd fast <4 x float> [[REVERSE11]], +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[TMP35]], i32 0 +; CHECK-NEXT: [[TMP37:%.*]] = bitcast float* [[TMP36]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP34]], <4 x float>* [[TMP37]], align 4 ; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], 4 -; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP39]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[TMP38]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N8]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] @@ -256,12 +255,12 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[I_014:%.*]] = phi i32 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP40:%.*]] = xor i32 [[I_014]], -1 -; CHECK-NEXT: [[SUB2:%.*]] = add i32 [[TMP40]], [[N]] +; CHECK-NEXT: [[TMP39:%.*]] = xor i32 [[I_014]], -1 +; CHECK-NEXT: [[SUB2:%.*]] = add i32 [[TMP39]], [[N]] ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB2]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP41:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CONV3:%.*]] = fadd fast float [[TMP41]], 1.000000e+00 +; CHECK-NEXT: [[TMP40:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CONV3:%.*]] = fadd fast float [[TMP40]], 1.000000e+00 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store float [[CONV3]], float* [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/pr45259.ll b/llvm/test/Transforms/LoopVectorize/pr45259.ll index 229274b2888e98..f4692d38c09938 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45259.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45259.ll @@ -31,12 +31,11 @@ define i8 @widget(i8* %arr, i8 %t9) { ; CHECK-NEXT: [[TMP7:%.*]] = sub i8 1, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = select i1 false, i1 [[TMP8]], i1 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP4]], 255 -; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP10]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP14:%.*]] = or i1 false, [[TMP13]] -; CHECK-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP4]], 255 +; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP13:%.*]] = or i1 false, [[TMP12]] +; CHECK-NEXT: br i1 [[TMP13]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]] @@ -47,18 +46,18 @@ define i8 @widget(i8* %arr, i8 %t9) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i8> [[VEC_IND]], -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i8> [[TMP15]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8* [[ARR]], i8 [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = icmp slt <4 x i8> [[TMP15]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP19:%.*]] = zext <4 x i1> [[TMP18]] to <4 x i8> -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[TMP17]], i32 0 -; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to <4 x i8>* -; CHECK-NEXT: store <4 x i8> [[TMP19]], <4 x i8>* [[TMP21]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i8> [[VEC_IND]], +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i8> [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[ARR]], i8 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp slt <4 x i8> [[TMP14]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP18:%.*]] = zext <4 x i1> [[TMP17]] to <4 x i8> +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, i8* [[TMP16]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to <4 x i8>* +; CHECK-NEXT: store <4 x i8> [[TMP18]], <4 x i8>* [[TMP20]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll index 0a1b4b7870dcf2..8797882a8433e1 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll @@ -15,44 +15,42 @@ define void @test(float* %A, i32 %x) { ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[TMP2]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = select i1 false, i1 [[TMP3]], i1 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP0]], [[TMP6]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP0]], [[TMP5]] ; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 undef) ; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = add i32 1, [[MUL_RESULT2]] -; CHECK-NEXT: [[TMP9:%.*]] = sub i32 1, [[MUL_RESULT2]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i32 [[TMP9]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i32 [[TMP8]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = select i1 false, i1 [[TMP10]], i1 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW3]] -; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP7]], [[TMP13]] -; CHECK-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = add i32 1, [[MUL_RESULT2]] +; CHECK-NEXT: [[TMP8:%.*]] = sub i32 1, [[MUL_RESULT2]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[TMP8]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i32 [[TMP7]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP10]], [[MUL_OVERFLOW3]] +; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP6]], [[TMP11]] +; CHECK-NEXT: br i1 [[TMP12]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = add nuw nsw i64 [[TMP15]], 1 -; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[TMP16]] to i32 -; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], [[X]] -; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast float* [[TMP21]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP22]], align 4 -; CHECK-NEXT: [[TMP23:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], 0 -; CHECK-NEXT: [[TMP25:%.*]] = mul i32 [[TMP24]], [[X]] -; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP26]] -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[TMP27]], i32 0 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast float* [[TMP28]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[WIDE_LOAD]], <4 x float>* [[TMP29]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[TMP13]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 +; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], [[X]] +; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast float* [[TMP19]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP20]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP21]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], [[X]] +; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[TMP25]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[WIDE_LOAD]], <4 x float>* [[TMP27]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll index da8163df973cac..16d192f7e31089 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll @@ -27,19 +27,18 @@ define void @load_clamped_index(i32* %A, i32* %B, i32 %N) { ; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3 -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] -; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]] +; CHECK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP12]], 1 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[TMP11]], 1 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP12]] ; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8* -; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP13]] +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP12]] ; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8* ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP45]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[A3]], [[SCEVGEP2]] @@ -51,20 +50,20 @@ define void @load_clamped_index(i32* %A, i32* %B, i32 %N) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = urem i32 [[TMP14]], 4 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <2 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP18]], align 4, !alias.scope !0 -; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP14]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP19]], <2 x i32>* [[TMP22]], align 4, !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = urem i32 [[TMP13]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <2 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP17]], align 4, !alias.scope !0 +; CHECK-NEXT: [[TMP18:%.*]] = add <2 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP18]], <2 x i32>* [[TMP21]], align 4, !alias.scope !3, !noalias !0 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -122,19 +121,18 @@ define void @store_clamped_index(i32* %A, i32* %B, i32 %N) { ; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3 -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] -; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]] +; CHECK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP12]], 1 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[TMP11]], 1 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP12]] ; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8* -; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP13]] +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP12]] ; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8* ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP45]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[A3]], [[SCEVGEP2]] @@ -146,20 +144,20 @@ define void @store_clamped_index(i32* %A, i32* %B, i32 %N) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = urem i32 [[TMP14]], 4 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP14]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <2 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP18]], align 4, !alias.scope !8, !noalias !11 -; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP15]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP19]], <2 x i32>* [[TMP22]], align 4, !alias.scope !11 +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = urem i32 [[TMP13]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <2 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP17]], align 4, !alias.scope !8, !noalias !11 +; CHECK-NEXT: [[TMP18:%.*]] = add <2 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP14]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP18]], <2 x i32>* [[TMP21]], align 4, !alias.scope !11 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -296,30 +294,29 @@ define void @clamped_index_equal_dependence(i32* %A, i32* %B, i32 %N) { ; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3 -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] -; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]] +; CHECK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = urem i32 [[TMP11]], 4 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <2 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP15]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = add <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP14]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP16]], <2 x i32>* [[TMP17]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = urem i32 [[TMP10]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <2 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = add <2 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP13]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP15]], <2 x i32>* [[TMP16]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll index 84a6b67548d144..844cafd8be94f7 100644 --- a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll +++ b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll @@ -38,23 +38,21 @@ define void @f1(i16* noalias %a, ; LV-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]] ; LV-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0 ; LV-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0 -; LV-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] -; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; LV-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] +; LV-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 +; LV-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] +; LV-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]] ; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) ; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 -; LV-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] -; LV-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] -; LV-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]] -; LV-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]] -; LV-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]] -; LV-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]] -; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] -; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]] -; LV-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] +; LV-NEXT: [[TMP10:%.*]] = sub i64 0, [[MUL_RESULT3]] +; LV-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] +; LV-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP10]] +; LV-NEXT: [[TMP13:%.*]] = icmp ugt i8* [[TMP12]], [[A5]] +; LV-NEXT: [[TMP14:%.*]] = icmp ult i8* [[TMP11]], [[A5]] +; LV-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW4]] +; LV-NEXT: [[TMP16:%.*]] = or i1 [[TMP9]], [[TMP15]] +; LV-NEXT: br i1 [[TMP16]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; LV: for.body.lver.orig: @@ -165,28 +163,26 @@ define void @f2(i16* noalias %a, ; LV-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]] ; LV-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP4]], [[TMP1]] ; LV-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP1]] -; LV-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]] -; LV-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] -; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] -; LV-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] -; LV-NEXT: [[TMP12:%.*]] = trunc i64 [[N]] to i31 -; LV-NEXT: [[TMP13:%.*]] = zext i31 [[TMP12]] to i64 -; LV-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 1 -; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP14]] +; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 +; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] +; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] +; LV-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] +; LV-NEXT: [[TMP11:%.*]] = trunc i64 [[N]] to i31 +; LV-NEXT: [[TMP12:%.*]] = zext i31 [[TMP11]] to i64 +; LV-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 1 +; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP13]] ; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) ; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 ; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8* -; LV-NEXT: [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT3]] -; LV-NEXT: [[TMP16:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]] -; LV-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP15]] -; LV-NEXT: [[TMP18:%.*]] = icmp ugt i8* [[TMP17]], [[SCEVGEP5]] -; LV-NEXT: [[TMP19:%.*]] = icmp ult i8* [[TMP16]], [[SCEVGEP5]] -; LV-NEXT: [[TMP20:%.*]] = select i1 true, i1 [[TMP18]], i1 [[TMP19]] -; LV-NEXT: [[TMP21:%.*]] = or i1 [[TMP20]], [[MUL_OVERFLOW4]] -; LV-NEXT: [[TMP22:%.*]] = or i1 [[TMP11]], [[TMP21]] -; LV-NEXT: br i1 [[TMP22]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] +; LV-NEXT: [[TMP14:%.*]] = sub i64 0, [[MUL_RESULT3]] +; LV-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]] +; LV-NEXT: [[TMP16:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP14]] +; LV-NEXT: [[TMP17:%.*]] = icmp ugt i8* [[TMP16]], [[SCEVGEP5]] +; LV-NEXT: [[TMP18:%.*]] = icmp ult i8* [[TMP15]], [[SCEVGEP5]] +; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW4]] +; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP10]], [[TMP19]] +; LV-NEXT: br i1 [[TMP20]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; LV: for.body.lver.orig: @@ -281,23 +277,21 @@ define void @f3(i16* noalias %a, ; LV-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]] ; LV-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0 ; LV-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0 -; LV-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] -; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; LV-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] +; LV-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 +; LV-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] +; LV-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]] ; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) ; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 -; LV-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] -; LV-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] -; LV-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]] -; LV-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]] -; LV-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]] -; LV-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]] -; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] -; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]] -; LV-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] +; LV-NEXT: [[TMP10:%.*]] = sub i64 0, [[MUL_RESULT3]] +; LV-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] +; LV-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP10]] +; LV-NEXT: [[TMP13:%.*]] = icmp ugt i8* [[TMP12]], [[A5]] +; LV-NEXT: [[TMP14:%.*]] = icmp ult i8* [[TMP11]], [[A5]] +; LV-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW4]] +; LV-NEXT: [[TMP16:%.*]] = or i1 [[TMP9]], [[TMP15]] +; LV-NEXT: br i1 [[TMP16]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; LV: for.body.lver.orig: @@ -384,26 +378,24 @@ define void @f4(i16* noalias %a, ; LV-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]] ; LV-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP1]] ; LV-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP1]] -; LV-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]] -; LV-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] -; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] -; LV-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] -; LV-NEXT: [[TMP12:%.*]] = sext i32 [[TMP1]] to i64 -; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]] +; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 +; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] +; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] +; LV-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] +; LV-NEXT: [[TMP11:%.*]] = sext i32 [[TMP1]] to i64 +; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP11]] ; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) ; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 ; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8* -; LV-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT3]] -; LV-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]] -; LV-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP13]] -; LV-NEXT: [[TMP16:%.*]] = icmp ugt i8* [[TMP15]], [[SCEVGEP5]] -; LV-NEXT: [[TMP17:%.*]] = icmp ult i8* [[TMP14]], [[SCEVGEP5]] -; LV-NEXT: [[TMP18:%.*]] = select i1 true, i1 [[TMP16]], i1 [[TMP17]] -; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]] -; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP11]], [[TMP19]] -; LV-NEXT: br i1 [[TMP20]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] +; LV-NEXT: [[TMP12:%.*]] = sub i64 0, [[MUL_RESULT3]] +; LV-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]] +; LV-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP12]] +; LV-NEXT: [[TMP15:%.*]] = icmp ugt i8* [[TMP14]], [[SCEVGEP5]] +; LV-NEXT: [[TMP16:%.*]] = icmp ult i8* [[TMP13]], [[SCEVGEP5]] +; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]] +; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]] +; LV-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; LV: for.body.lver.orig: @@ -498,26 +490,24 @@ define void @f5(i16* noalias %a, ; LV-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]] ; LV-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP1]] ; LV-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP1]] -; LV-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]] -; LV-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] -; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] -; LV-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] -; LV-NEXT: [[TMP12:%.*]] = sext i32 [[TMP1]] to i64 -; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]] +; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 +; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] +; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] +; LV-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] +; LV-NEXT: [[TMP11:%.*]] = sext i32 [[TMP1]] to i64 +; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP11]] ; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) ; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 ; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8* -; LV-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT3]] -; LV-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]] -; LV-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP13]] -; LV-NEXT: [[TMP16:%.*]] = icmp ugt i8* [[TMP15]], [[SCEVGEP5]] -; LV-NEXT: [[TMP17:%.*]] = icmp ult i8* [[TMP14]], [[SCEVGEP5]] -; LV-NEXT: [[TMP18:%.*]] = select i1 true, i1 [[TMP16]], i1 [[TMP17]] -; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]] -; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP11]], [[TMP19]] -; LV-NEXT: br i1 [[TMP20]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] +; LV-NEXT: [[TMP12:%.*]] = sub i64 0, [[MUL_RESULT3]] +; LV-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]] +; LV-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP12]] +; LV-NEXT: [[TMP15:%.*]] = icmp ugt i8* [[TMP14]], [[SCEVGEP5]] +; LV-NEXT: [[TMP16:%.*]] = icmp ult i8* [[TMP13]], [[SCEVGEP5]] +; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]] +; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]] +; LV-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; LV: for.body.lver.orig: