diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 4ba2e15222106..08a22a579a2e3 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1397,8 +1397,7 @@ void AccessAnalysis::processMemAccesses() { static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L) { - // FIXME: This should probably only return true for NUW. - if (AR->getNoWrapFlags(SCEV::NoWrapMask)) + if (AR->getNoWrapFlags(SCEV::FlagNUW)) return true; if (PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW)) diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll index adfd19923e921..da1b34d99779d 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll @@ -79,6 +79,8 @@ define void @forward_different_access_sizes(ptr readnone %end, ptr %start) { ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: +; CHECK-NEXT: {(1 + %start),+,8}<%loop> Added Flags: +; CHECK-NEXT: {(4 + %start),+,8}<%loop> Added Flags: ; CHECK-EMPTY: ; CHECK-NEXT: Expressions re-written: ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 1853e551806bc..b4935d62052c5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -380,6 +380,14 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-LABEL: @test_reversed_load2_store2( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 8184 +; CHECK-NEXT: [[TMP21:%.*]] = icmp ult ptr [[SCEVGEP]], [[B]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[B]], i64 8188 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 4 +; CHECK-NEXT: [[TMP23:%.*]] = icmp ugt ptr [[TMP22]], [[SCEVGEP1]] +; CHECK-NEXT: [[TMP24:%.*]] = or i1 [[TMP21]], [[TMP23]] +; CHECK-NEXT: br i1 [[TMP24]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 @@ -391,8 +399,8 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], ptr [[A:%.*]], i64 [[OFFSET_IDX]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() @@ -408,7 +416,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[REVERSE1:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = add nsw [[REVERSE]], [[VEC_IND]] ; CHECK-NEXT: [[TMP13:%.*]] = sub nsw [[REVERSE1]], [[VEC_IND]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B:%.*]], i64 [[OFFSET_IDX]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B]], i64 [[OFFSET_IDX]], i32 1 ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i32 [[TMP15]], 3 ; CHECK-NEXT: [[TMP17:%.*]] = sub nsw i32 1, [[TMP16]] @@ -429,7 +437,21 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 1023, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[A]], i64 [[INDVARS_IV]], i32 0 +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[X]], align 4 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[LOAD1]], [[TRUNC]] +; CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[A]], i64 [[INDVARS_IV]], i32 1 +; CHECK-NEXT: [[LOAD2:%.*]] = load i32, ptr [[Y]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[LOAD2]], [[TRUNC]] +; CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B]], i64 [[INDVARS_IV]], i32 0 +; CHECK-NEXT: store i32 [[ADD]], ptr [[X5]], align 4 +; CHECK-NEXT: [[Y8:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B]], i64 [[INDVARS_IV]], i32 1 +; CHECK-NEXT: store i32 [[SUB]], ptr [[Y8]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]] ; entry: br label %for.body @@ -639,6 +661,11 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK-LABEL: @load_gap_reverse( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 16376 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[P2]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt ptr [[TMP9]], [[SCEVGEP]] +; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 @@ -652,11 +679,11 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add nsw [[BROADCAST_SPLAT]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P1:%.*]], [[VEC_IND]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2:%.*]], [[VEC_IND]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2]], [[VEC_IND]], i32 1 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i64.nxv4p0( [[TMP6]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) ; CHECK-NEXT: [[TMP7:%.*]] = sub nsw [[WIDE_MASKED_GATHER]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP4]], [[TMP5]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) @@ -670,7 +697,10 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_EXIT]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: [[I:%.*]] = phi i64 [ 1023, [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I_NEXT]] = add nsw i64 [[I]], -1 +; CHECK-NEXT: [[COND:%.*]] = icmp sgt i64 [[I]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_EXIT]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index 726d98f4d37d3..e8a025365ec7f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -30,6 +30,11 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i32 [[TMP1]], 1024 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SCALAR_TAIL_FOLDING: vector.scevcheck: +; SCALAR_TAIL_FOLDING-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 1 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[Q]], i64 2047 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP29:%.*]] = icmp ult ptr [[TMP28]], [[SCEVGEP]] +; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP29]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; SCALAR_TAIL_FOLDING: vector.ph: ; SCALAR_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 @@ -46,8 +51,8 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: -; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = shl i32 [[INDEX]], 1 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 @@ -74,7 +79,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALAR_TAIL_FOLDING: scalar.ph: -; SCALAR_TAIL_FOLDING-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALAR_TAIL_FOLDING-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_PH]] ] ; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: for.body: ; SCALAR_TAIL_FOLDING-NEXT: [[IX_024:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] @@ -108,9 +113,14 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided1 ; PREDICATED_TAIL_FOLDING-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i8 zeroext [[GUARD:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; PREDICATED_TAIL_FOLDING-NEXT: entry: +; PREDICATED_TAIL_FOLDING-NEXT: [[CONV:%.*]] = zext i8 [[GUARD]] to i32 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; PREDICATED_TAIL_FOLDING: vector.scevcheck: +; PREDICATED_TAIL_FOLDING-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[Q]], i64 2047 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP23:%.*]] = icmp ult ptr [[TMP22]], [[SCEVGEP]] +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP23]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; PREDICATED_TAIL_FOLDING: vector.ph: -; PREDICATED_TAIL_FOLDING-NEXT: [[CONV:%.*]] = zext i8 [[GUARD]] to i32 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = shl i32 [[TMP19]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() @@ -126,9 +136,9 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: -; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH1]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl i32 [[INDEX]], 1 @@ -158,11 +168,31 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING: scalar.ph: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: for.body: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] +; PREDICATED_TAIL_FOLDING-NEXT: [[IX_024:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_024]], [[CONV]] +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; PREDICATED_TAIL_FOLDING: if.then: +; PREDICATED_TAIL_FOLDING-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_024]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP24:%.*]] = zext nneg i32 [[MUL]] to i64 +; PREDICATED_TAIL_FOLDING-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP24]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP25:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[ADD:%.*]] = or disjoint i32 [[MUL]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP26:%.*]] = zext nneg i32 [[ADD]] to i64 +; PREDICATED_TAIL_FOLDING-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP26]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP27:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[SPEC_SELECT_I:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP25]], i8 [[TMP27]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP28:%.*]] = zext nneg i32 [[MUL]] to i64 +; PREDICATED_TAIL_FOLDING-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP28]] +; PREDICATED_TAIL_FOLDING-NEXT: store i8 [[SPEC_SELECT_I]], ptr [[ARRAYIDX6]], align 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[SUB:%.*]] = sub i8 0, [[SPEC_SELECT_I]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP29:%.*]] = zext nneg i32 [[ADD]] to i64 +; PREDICATED_TAIL_FOLDING-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP29]] +; PREDICATED_TAIL_FOLDING-NEXT: store i8 [[SUB]], ptr [[ARRAYIDX11]], align 1 ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_INC]] ; PREDICATED_TAIL_FOLDING: for.inc: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; PREDICATED_TAIL_FOLDING-NEXT: [[INC]] = add nuw nsw i32 [[IX_024]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024 +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; PREDICATED_TAIL_FOLDING: for.end: ; PREDICATED_TAIL_FOLDING-NEXT: ret void ; @@ -222,6 +252,11 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i32 [[TMP1]], 1024 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SCALAR_TAIL_FOLDING: vector.scevcheck: +; SCALAR_TAIL_FOLDING-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 1 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[Q]], i64 2047 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = icmp ult ptr [[TMP19]], [[SCEVGEP]] +; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP20]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; SCALAR_TAIL_FOLDING: vector.ph: ; SCALAR_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 @@ -238,8 +273,8 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: -; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = zext nneg [[TMP7]] to ; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP8]] @@ -257,7 +292,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALAR_TAIL_FOLDING: scalar.ph: -; SCALAR_TAIL_FOLDING-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALAR_TAIL_FOLDING-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_PH]] ] ; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: for.body: ; SCALAR_TAIL_FOLDING-NEXT: [[IX_012:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] @@ -283,9 +318,14 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided2 ; PREDICATED_TAIL_FOLDING-SAME: (ptr noalias nocapture readnone [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i8 zeroext [[GUARD:%.*]]) local_unnamed_addr #[[ATTR0]] { ; PREDICATED_TAIL_FOLDING-NEXT: entry: +; PREDICATED_TAIL_FOLDING-NEXT: [[CONV:%.*]] = zext i8 [[GUARD]] to i32 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; PREDICATED_TAIL_FOLDING: vector.scevcheck: +; PREDICATED_TAIL_FOLDING-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[Q]], i64 2047 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = icmp ult ptr [[TMP17]], [[SCEVGEP]] +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; PREDICATED_TAIL_FOLDING: vector.ph: -; PREDICATED_TAIL_FOLDING-NEXT: [[CONV:%.*]] = zext i8 [[GUARD]] to i32 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = shl i32 [[TMP14]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() @@ -301,9 +341,9 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: -; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH1]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = zext nneg [[TMP6]] to ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP7]] @@ -324,11 +364,23 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING: scalar.ph: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: for.body: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] +; PREDICATED_TAIL_FOLDING-NEXT: [[IX_012:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_012]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = zext nneg i32 [[MUL]] to i64 +; PREDICATED_TAIL_FOLDING-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP19]] +; PREDICATED_TAIL_FOLDING-NEXT: store i8 1, ptr [[ARRAYIDX]], align 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_012]], [[CONV]] +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; PREDICATED_TAIL_FOLDING: if.then: +; PREDICATED_TAIL_FOLDING-NEXT: [[ADD:%.*]] = or disjoint i32 [[MUL]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = zext nneg i32 [[ADD]] to i64 +; PREDICATED_TAIL_FOLDING-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP20]] +; PREDICATED_TAIL_FOLDING-NEXT: store i8 2, ptr [[ARRAYIDX3]], align 1 ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_INC]] ; PREDICATED_TAIL_FOLDING: for.inc: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; PREDICATED_TAIL_FOLDING-NEXT: [[INC]] = add nuw nsw i32 [[IX_012]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024 +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; PREDICATED_TAIL_FOLDING: for.end: ; PREDICATED_TAIL_FOLDING-NEXT: ret void ; @@ -384,6 +436,11 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i32 [[TMP1]], 1024 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SCALAR_TAIL_FOLDING: vector.scevcheck: +; SCALAR_TAIL_FOLDING-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 1 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[Q]], i64 2047 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = icmp ult ptr [[TMP20]], [[SCEVGEP]] +; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP21]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; SCALAR_TAIL_FOLDING: vector.ph: ; SCALAR_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 @@ -402,8 +459,8 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: -; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = zext nneg [[TMP7]] to @@ -422,7 +479,7 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALAR_TAIL_FOLDING: scalar.ph: -; SCALAR_TAIL_FOLDING-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALAR_TAIL_FOLDING-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_PH]] ] ; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: for.body: ; SCALAR_TAIL_FOLDING-NEXT: [[IX_018:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] @@ -453,10 +510,15 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided3 ; PREDICATED_TAIL_FOLDING-SAME: (ptr noalias nocapture readnone [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i8 zeroext [[GUARD1:%.*]], i8 zeroext [[GUARD2:%.*]]) local_unnamed_addr #[[ATTR0]] { ; PREDICATED_TAIL_FOLDING-NEXT: entry: +; PREDICATED_TAIL_FOLDING-NEXT: [[CONV:%.*]] = zext i8 [[GUARD1]] to i32 +; PREDICATED_TAIL_FOLDING-NEXT: [[CONV3:%.*]] = zext i8 [[GUARD2]] to i32 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; PREDICATED_TAIL_FOLDING: vector.scevcheck: +; PREDICATED_TAIL_FOLDING-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[Q]], i64 2047 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = icmp ult ptr [[TMP19]], [[SCEVGEP]] +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP20]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; PREDICATED_TAIL_FOLDING: vector.ph: -; PREDICATED_TAIL_FOLDING-NEXT: [[CONV3:%.*]] = zext i8 [[GUARD2]] to i32 -; PREDICATED_TAIL_FOLDING-NEXT: [[CONV:%.*]] = zext i8 [[GUARD1]] to i32 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = call i32 @llvm.vscale.i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = shl i32 [[TMP16]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() @@ -474,9 +536,9 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: -; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH1]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP7]], zeroinitializer @@ -499,15 +561,28 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING: scalar.ph: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: for.body: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; PREDICATED_TAIL_FOLDING-NEXT: [[IX_018:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_018]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_018]], [[CONV]] +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; PREDICATED_TAIL_FOLDING: if.then: +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = zext nneg i32 [[MUL]] to i64 +; PREDICATED_TAIL_FOLDING-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP21]] +; PREDICATED_TAIL_FOLDING-NEXT: store i8 1, ptr [[ARRAYIDX]], align 1 ; PREDICATED_TAIL_FOLDING-NEXT: br label [[IF_END]] ; PREDICATED_TAIL_FOLDING: if.end: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 poison, label [[IF_THEN6:%.*]], label [[FOR_INC:%.*]] +; PREDICATED_TAIL_FOLDING-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[IX_018]], [[CONV3]] +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[CMP4]], label [[IF_THEN6:%.*]], label [[FOR_INC]] ; PREDICATED_TAIL_FOLDING: if.then6: +; PREDICATED_TAIL_FOLDING-NEXT: [[ADD:%.*]] = or disjoint i32 [[MUL]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP22:%.*]] = zext nneg i32 [[ADD]] to i64 +; PREDICATED_TAIL_FOLDING-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP22]] +; PREDICATED_TAIL_FOLDING-NEXT: store i8 2, ptr [[ARRAYIDX7]], align 1 ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_INC]] ; PREDICATED_TAIL_FOLDING: for.inc: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; PREDICATED_TAIL_FOLDING-NEXT: [[INC]] = add nuw nsw i32 [[IX_018]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024 +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; PREDICATED_TAIL_FOLDING: for.end: ; PREDICATED_TAIL_FOLDING-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index 576dc0833fa36..37d74e825ccdb 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -6,10 +6,25 @@ define void @load_store_factor2_i32(ptr %p) { ; CHECK-LABEL: @load_store_factor2_i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 4 +; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 1023) +; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ult ptr [[TMP12]], [[SCEVGEP]] +; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 1023) +; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT2]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[P]], i64 [[MUL_RESULT2]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ult ptr [[TMP16]], [[P]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW3]] +; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP14]], [[TMP18]] +; CHECK-NEXT: br i1 [[TMP19]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 @@ -19,30 +34,30 @@ define void @load_store_factor2_i32(ptr %p) { ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) -; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP7]], 1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = add [[TMP11]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP14]], i32 -1 -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[TMP12]], [[TMP15]]) -; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP16]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC1]], +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP6]], i32 -1 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP7]], <16 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_PH]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -195,10 +210,25 @@ exit: define void @load_store_factor2_i64(ptr %p) { ; CHECK-LABEL: @load_store_factor2_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 8 +; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 1023) +; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ult ptr [[TMP12]], [[SCEVGEP]] +; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 1023) +; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT2]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[P]], i64 [[MUL_RESULT2]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ult ptr [[TMP16]], [[P]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW3]] +; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP14]], [[TMP18]] +; CHECK-NEXT: br i1 [[TMP19]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 @@ -208,30 +238,30 @@ define void @load_store_factor2_i64(ptr %p) { ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 8 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv4i64( [[WIDE_VEC]]) -; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP7]], 1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = add [[TMP11]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP14]], i32 -1 -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv4i64( [[TMP12]], [[TMP15]]) -; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP16]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i64> [[STRIDED_VEC1]], +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP6]], i32 -1 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP7]], <8 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> +; CHECK-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_PH]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -385,13 +415,40 @@ define void @load_store_factor3_i32(ptr %p) { ; CHECK-LABEL: @load_store_factor3_i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 8 +; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 12, i64 1023) +; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ult ptr [[TMP16]], [[SCEVGEP]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[P]], i64 4 +; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 12, i64 1023) +; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 +; CHECK-NEXT: [[TMP19:%.*]] = sub i64 0, [[MUL_RESULT3]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp ult ptr [[TMP20]], [[SCEVGEP1]] +; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP21]], [[MUL_OVERFLOW4]] +; CHECK-NEXT: [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 12, i64 1023) +; CHECK-NEXT: [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1 +; CHECK-NEXT: [[TMP23:%.*]] = sub i64 0, [[MUL_RESULT6]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[P]], i64 [[MUL_RESULT6]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp ult ptr [[TMP24]], [[P]] +; CHECK-NEXT: [[TMP26:%.*]] = or i1 [[TMP25]], [[MUL_OVERFLOW7]] +; CHECK-NEXT: [[TMP27:%.*]] = or i1 [[TMP18]], [[TMP22]] +; CHECK-NEXT: [[TMP28:%.*]] = or i1 [[TMP27]], [[TMP26]] +; CHECK-NEXT: br i1 [[TMP28]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <6 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <6 x i32> [[WIDE_VEC]], <6 x i32> poison, <2 x i32> @@ -415,7 +472,7 @@ define void @load_store_factor3_i32(ptr %p) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_PH]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -591,8 +648,36 @@ define void @load_store_factor3_i64(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.umax.i64(i64 24, i64 [[TMP1]]) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP22]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 16 +; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 24, i64 1023) +; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 +; CHECK-NEXT: [[TMP23:%.*]] = sub i64 0, [[MUL_RESULT]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp ult ptr [[TMP24]], [[SCEVGEP]] +; CHECK-NEXT: [[TMP26:%.*]] = or i1 [[TMP25]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[P]], i64 8 +; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 24, i64 1023) +; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 +; CHECK-NEXT: [[TMP27:%.*]] = sub i64 0, [[MUL_RESULT3]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] +; CHECK-NEXT: [[TMP29:%.*]] = icmp ult ptr [[TMP28]], [[SCEVGEP1]] +; CHECK-NEXT: [[TMP30:%.*]] = or i1 [[TMP29]], [[MUL_OVERFLOW4]] +; CHECK-NEXT: [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 24, i64 1023) +; CHECK-NEXT: [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1 +; CHECK-NEXT: [[TMP31:%.*]] = sub i64 0, [[MUL_RESULT6]] +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[P]], i64 [[MUL_RESULT6]] +; CHECK-NEXT: [[TMP33:%.*]] = icmp ult ptr [[TMP32]], [[P]] +; CHECK-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW7]] +; CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP26]], [[TMP30]] +; CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] +; CHECK-NEXT: br i1 [[TMP36]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 @@ -611,24 +696,24 @@ define void @load_store_factor3_i64(ptr %p) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = mul [[VEC_IND]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[P:%.*]], [[TMP12]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP13]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP14:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP14]], [[TMP13]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP15:%.*]] = add [[TMP12]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[P]], [[TMP15]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP16]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP17:%.*]] = add [[WIDE_MASKED_GATHER1]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP17]], [[TMP16]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP18:%.*]] = add [[TMP15]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[P]], [[TMP18]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP19]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP20:%.*]] = add [[WIDE_MASKED_GATHER2]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP20]], [[TMP19]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = mul [[VEC_IND]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], [[TMP10]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP11]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP12:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP12]], [[TMP11]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP13:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[P]], [[TMP13]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP14]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP15:%.*]] = add [[WIDE_MASKED_GATHER1]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP15]], [[TMP14]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP16:%.*]] = add [[TMP13]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[P]], [[TMP16]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP17]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP18:%.*]] = add [[WIDE_MASKED_GATHER2]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP18]], [[TMP17]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP20]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -636,7 +721,7 @@ define void @load_store_factor3_i64(ptr %p) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_PH]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -825,8 +910,81 @@ define void @load_store_factor8(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP42:%.*]] = call i64 @llvm.umax.i64(i64 28, i64 [[TMP1]]) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP42]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 56 +; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 64, i64 1023) +; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 +; CHECK-NEXT: [[TMP43:%.*]] = sub i64 0, [[MUL_RESULT]] +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] +; CHECK-NEXT: [[TMP45:%.*]] = icmp ult ptr [[TMP44]], [[SCEVGEP]] +; CHECK-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[P]], i64 48 +; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 64, i64 1023) +; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 +; CHECK-NEXT: [[TMP47:%.*]] = sub i64 0, [[MUL_RESULT3]] +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] +; CHECK-NEXT: [[TMP49:%.*]] = icmp ult ptr [[TMP48]], [[SCEVGEP1]] +; CHECK-NEXT: [[TMP50:%.*]] = or i1 [[TMP49]], [[MUL_OVERFLOW4]] +; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[P]], i64 40 +; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 64, i64 1023) +; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1 +; CHECK-NEXT: [[TMP51:%.*]] = sub i64 0, [[MUL_RESULT7]] +; CHECK-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]] +; CHECK-NEXT: [[TMP53:%.*]] = icmp ult ptr [[TMP52]], [[SCEVGEP5]] +; CHECK-NEXT: [[TMP54:%.*]] = or i1 [[TMP53]], [[MUL_OVERFLOW8]] +; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i8, ptr [[P]], i64 32 +; CHECK-NEXT: [[MUL10:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 64, i64 1023) +; CHECK-NEXT: [[MUL_RESULT11:%.*]] = extractvalue { i64, i1 } [[MUL10]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW12:%.*]] = extractvalue { i64, i1 } [[MUL10]], 1 +; CHECK-NEXT: [[TMP55:%.*]] = sub i64 0, [[MUL_RESULT11]] +; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[SCEVGEP9]], i64 [[MUL_RESULT11]] +; CHECK-NEXT: [[TMP57:%.*]] = icmp ult ptr [[TMP56]], [[SCEVGEP9]] +; CHECK-NEXT: [[TMP58:%.*]] = or i1 [[TMP57]], [[MUL_OVERFLOW12]] +; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[P]], i64 24 +; CHECK-NEXT: [[MUL14:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 64, i64 1023) +; CHECK-NEXT: [[MUL_RESULT15:%.*]] = extractvalue { i64, i1 } [[MUL14]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW16:%.*]] = extractvalue { i64, i1 } [[MUL14]], 1 +; CHECK-NEXT: [[TMP59:%.*]] = sub i64 0, [[MUL_RESULT15]] +; CHECK-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr [[SCEVGEP13]], i64 [[MUL_RESULT15]] +; CHECK-NEXT: [[TMP61:%.*]] = icmp ult ptr [[TMP60]], [[SCEVGEP13]] +; CHECK-NEXT: [[TMP62:%.*]] = or i1 [[TMP61]], [[MUL_OVERFLOW16]] +; CHECK-NEXT: [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[P]], i64 16 +; CHECK-NEXT: [[MUL18:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 64, i64 1023) +; CHECK-NEXT: [[MUL_RESULT19:%.*]] = extractvalue { i64, i1 } [[MUL18]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW20:%.*]] = extractvalue { i64, i1 } [[MUL18]], 1 +; CHECK-NEXT: [[TMP63:%.*]] = sub i64 0, [[MUL_RESULT19]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[SCEVGEP17]], i64 [[MUL_RESULT19]] +; CHECK-NEXT: [[TMP65:%.*]] = icmp ult ptr [[TMP64]], [[SCEVGEP17]] +; CHECK-NEXT: [[TMP66:%.*]] = or i1 [[TMP65]], [[MUL_OVERFLOW20]] +; CHECK-NEXT: [[SCEVGEP21:%.*]] = getelementptr i8, ptr [[P]], i64 8 +; CHECK-NEXT: [[MUL22:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 64, i64 1023) +; CHECK-NEXT: [[MUL_RESULT23:%.*]] = extractvalue { i64, i1 } [[MUL22]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW24:%.*]] = extractvalue { i64, i1 } [[MUL22]], 1 +; CHECK-NEXT: [[TMP67:%.*]] = sub i64 0, [[MUL_RESULT23]] +; CHECK-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[SCEVGEP21]], i64 [[MUL_RESULT23]] +; CHECK-NEXT: [[TMP69:%.*]] = icmp ult ptr [[TMP68]], [[SCEVGEP21]] +; CHECK-NEXT: [[TMP70:%.*]] = or i1 [[TMP69]], [[MUL_OVERFLOW24]] +; CHECK-NEXT: [[MUL25:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 64, i64 1023) +; CHECK-NEXT: [[MUL_RESULT26:%.*]] = extractvalue { i64, i1 } [[MUL25]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW27:%.*]] = extractvalue { i64, i1 } [[MUL25]], 1 +; CHECK-NEXT: [[TMP71:%.*]] = sub i64 0, [[MUL_RESULT26]] +; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr [[P]], i64 [[MUL_RESULT26]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp ult ptr [[TMP72]], [[P]] +; CHECK-NEXT: [[TMP74:%.*]] = or i1 [[TMP73]], [[MUL_OVERFLOW27]] +; CHECK-NEXT: [[TMP75:%.*]] = or i1 [[TMP46]], [[TMP50]] +; CHECK-NEXT: [[TMP76:%.*]] = or i1 [[TMP75]], [[TMP54]] +; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[TMP76]], [[TMP58]] +; CHECK-NEXT: [[TMP38:%.*]] = or i1 [[TMP37]], [[TMP62]] +; CHECK-NEXT: [[TMP39:%.*]] = or i1 [[TMP38]], [[TMP66]] +; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[TMP39]], [[TMP70]] +; CHECK-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP74]] +; CHECK-NEXT: br i1 [[TMP41]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 @@ -845,49 +1003,49 @@ define void @load_store_factor8(ptr %p) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = shl [[VEC_IND]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[P:%.*]], [[TMP12]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP13]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP14:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP14]], [[TMP13]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP15:%.*]] = add [[TMP12]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[P]], [[TMP15]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP16]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP17:%.*]] = add [[WIDE_MASKED_GATHER1]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP17]], [[TMP16]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP18:%.*]] = add [[TMP15]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[P]], [[TMP18]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP19]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP20:%.*]] = add [[WIDE_MASKED_GATHER2]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP20]], [[TMP19]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP21:%.*]] = add [[TMP18]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[P]], [[TMP21]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP22]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP23:%.*]] = add [[WIDE_MASKED_GATHER3]], shufflevector ( insertelement ( poison, i64 4, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP23]], [[TMP22]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP24:%.*]] = add [[TMP21]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i64, ptr [[P]], [[TMP24]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP25]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP26:%.*]] = add [[WIDE_MASKED_GATHER4]], shufflevector ( insertelement ( poison, i64 5, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP26]], [[TMP25]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP27:%.*]] = add [[TMP24]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[P]], [[TMP27]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP28]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP29:%.*]] = add [[WIDE_MASKED_GATHER5]], shufflevector ( insertelement ( poison, i64 6, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP29]], [[TMP28]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP30:%.*]] = add [[TMP27]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[P]], [[TMP30]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP31]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP32:%.*]] = add [[WIDE_MASKED_GATHER6]], shufflevector ( insertelement ( poison, i64 7, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP32]], [[TMP31]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP33:%.*]] = add [[TMP30]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[P]], [[TMP33]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP34]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP35:%.*]] = add [[WIDE_MASKED_GATHER7]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP35]], [[TMP34]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = shl [[VEC_IND]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], [[TMP10]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP11]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP12:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP12]], [[TMP11]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP13:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[P]], [[TMP13]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP14]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP15:%.*]] = add [[WIDE_MASKED_GATHER1]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP15]], [[TMP14]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP16:%.*]] = add [[TMP13]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[P]], [[TMP16]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP17]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP18:%.*]] = add [[WIDE_MASKED_GATHER2]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP18]], [[TMP17]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP19:%.*]] = add [[TMP16]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, ptr [[P]], [[TMP19]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP20]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP21:%.*]] = add [[WIDE_MASKED_GATHER3]], shufflevector ( insertelement ( poison, i64 4, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP21]], [[TMP20]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP22:%.*]] = add [[TMP19]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i64, ptr [[P]], [[TMP22]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP23]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP24:%.*]] = add [[WIDE_MASKED_GATHER4]], shufflevector ( insertelement ( poison, i64 5, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP24]], [[TMP23]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP25:%.*]] = add [[TMP22]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i64, ptr [[P]], [[TMP25]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP26]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP27:%.*]] = add [[WIDE_MASKED_GATHER5]], shufflevector ( insertelement ( poison, i64 6, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP27]], [[TMP26]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP28:%.*]] = add [[TMP25]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[P]], [[TMP28]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP29]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP30:%.*]] = add [[WIDE_MASKED_GATHER6]], shufflevector ( insertelement ( poison, i64 7, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP30]], [[TMP29]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP31:%.*]] = add [[TMP28]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[P]], [[TMP31]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP32]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP33:%.*]] = add [[WIDE_MASKED_GATHER7]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP33]], [[TMP32]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP35]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -895,7 +1053,7 @@ define void @load_store_factor8(ptr %p) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_PH]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 6936887cd166c..01f599b9edadd 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -8,43 +8,52 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.umax.i64(i64 24, i64 [[TMP1]]) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP18]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 1023) +; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 +; CHECK-NEXT: [[TMP19:%.*]] = sub i64 0, [[MUL_RESULT]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[MUL_RESULT]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr [[TMP4]], [[P]] +; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]] +; CHECK-NEXT: br i1 [[TMP20]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP9:%.*]] = add [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP9]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4 -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 1, [[TMP12]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP14:%.*]] = mul nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P:%.*]], [[TMP14]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP15]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP16:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP16]], [[TMP15]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP12:%.*]] = mul nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[P]], [[TMP12]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP14:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP14]], [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[SCALAR_PH]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_PH]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -55,7 +64,7 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4 ; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 ; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -82,8 +91,18 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.umax.i64(i64 24, i64 [[TMP1]]) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP15]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 256, i64 1023) +; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 +; CHECK-NEXT: [[TMP16:%.*]] = sub i64 0, [[MUL_RESULT]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[MUL_RESULT]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp ult ptr [[TMP17]], [[P]] +; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW]] +; CHECK-NEXT: br i1 [[TMP19]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 @@ -103,13 +122,13 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P:%.*]], [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP12]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP13:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP13]], [[TMP12]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[P]], [[VEC_IND]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP11:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP11]], [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -117,8 +136,8 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_PH]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_PH]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -551,13 +570,13 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-NEXT: [[TMP18:%.*]] = mul nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] -; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P]], [[TMP18]] -; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP19]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope [[META8:![0-9]+]] -; STRIDED-NEXT: [[TMP20:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[P2]], [[TMP18]] -; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP20]], [[TMP21]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]] -; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; STRIDED-NEXT: [[TMP16:%.*]] = mul nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] +; STRIDED-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[P]], [[TMP16]] +; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP17]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope [[META8:![0-9]+]] +; STRIDED-NEXT: [[TMP18:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P2]], [[TMP16]] +; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP18]], [[TMP19]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]] +; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] ; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] @@ -772,16 +791,16 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP23]], 0 ; STRIDED-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement poison, i64 [[TMP26]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT14:%.*]] = shufflevector [[DOTSPLATINSERT13]], poison, zeroinitializer -; STRIDED-NEXT: [[TMP27:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; STRIDED-NEXT: [[TMP28:%.*]] = add [[DOTSPLAT14]], [[TMP27]] -; STRIDED-NEXT: [[VECTOR_GEP17:%.*]] = mul [[TMP28]], [[DOTSPLAT10]] -; STRIDED-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], [[VECTOR_GEP17]] -; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP21]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope [[META15:![0-9]+]] -; STRIDED-NEXT: [[TMP30:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP30]], [[TMP29]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope [[META18:![0-9]+]], !noalias [[META15]] -; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] -; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP17]] -; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP25]] +; STRIDED-NEXT: [[TMP25:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; STRIDED-NEXT: [[TMP26:%.*]] = add [[DOTSPLAT14]], [[TMP25]] +; STRIDED-NEXT: [[VECTOR_GEP17:%.*]] = mul [[TMP26]], [[DOTSPLAT10]] +; STRIDED-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], [[VECTOR_GEP17]] +; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP19]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope [[META15:![0-9]+]] +; STRIDED-NEXT: [[TMP28:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP28]], [[TMP27]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope [[META18:![0-9]+]], !noalias [[META15]] +; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP30]] +; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP15]] +; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP23]] ; STRIDED-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; STRIDED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index 8004563f38165..185738776d04a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -232,6 +232,104 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i64> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP97:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP97]] +; CHECK-NEXT: [[TMP98:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP99:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP100:%.*]] = add i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP101:%.*]] = add i64 [[OFFSET_IDX]], 6 +; CHECK-NEXT: [[TMP102:%.*]] = add i64 [[OFFSET_IDX]], 8 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 10 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 12 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 14 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 16 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 18 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 20 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 22 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 24 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 26 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 28 +; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 30 +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], 32 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], 34 +; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[OFFSET_IDX]], 36 +; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[OFFSET_IDX]], 38 +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 40 +; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[OFFSET_IDX]], 42 +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[OFFSET_IDX]], 44 +; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[OFFSET_IDX]], 46 +; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], 48 +; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[OFFSET_IDX]], 50 +; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[OFFSET_IDX]], 52 +; CHECK-NEXT: [[TMP28:%.*]] = add i64 [[OFFSET_IDX]], 54 +; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX]], 56 +; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[OFFSET_IDX]], 58 +; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[OFFSET_IDX]], 60 +; CHECK-NEXT: [[TMP32:%.*]] = add i64 [[OFFSET_IDX]], 62 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[TMP98]] +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP99]] +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP100]] +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP101]] +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP102]] +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP41:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP42:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP45:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP49:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP18]] +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP52:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP20]] +; CHECK-NEXT: [[TMP53:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP54:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP22]] +; CHECK-NEXT: [[TMP55:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP23]] +; CHECK-NEXT: [[TMP56:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP57:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP58:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP26]] +; CHECK-NEXT: [[TMP59:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP27]] +; CHECK-NEXT: [[TMP60:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP28]] +; CHECK-NEXT: [[TMP61:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP29]] +; CHECK-NEXT: [[TMP62:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP30]] +; CHECK-NEXT: [[TMP63:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP32]] +; CHECK-NEXT: [[TMP65:%.*]] = load double, ptr [[TMP33]], align 8 +; CHECK-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP34]], align 8 +; CHECK-NEXT: [[TMP67:%.*]] = load double, ptr [[TMP35]], align 8 +; CHECK-NEXT: [[TMP68:%.*]] = load double, ptr [[TMP36]], align 8 +; CHECK-NEXT: [[TMP69:%.*]] = load double, ptr [[TMP37]], align 8 +; CHECK-NEXT: [[TMP70:%.*]] = load double, ptr [[TMP38]], align 8 +; CHECK-NEXT: [[TMP71:%.*]] = load double, ptr [[TMP39]], align 8 +; CHECK-NEXT: [[TMP72:%.*]] = load double, ptr [[TMP40]], align 8 +; CHECK-NEXT: [[TMP73:%.*]] = load double, ptr [[TMP41]], align 8 +; CHECK-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP42]], align 8 +; CHECK-NEXT: [[TMP75:%.*]] = load double, ptr [[TMP43]], align 8 +; CHECK-NEXT: [[TMP76:%.*]] = load double, ptr [[TMP44]], align 8 +; CHECK-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP45]], align 8 +; CHECK-NEXT: [[TMP78:%.*]] = load double, ptr [[TMP46]], align 8 +; CHECK-NEXT: [[TMP79:%.*]] = load double, ptr [[TMP47]], align 8 +; CHECK-NEXT: [[TMP80:%.*]] = load double, ptr [[TMP48]], align 8 +; CHECK-NEXT: [[TMP81:%.*]] = load double, ptr [[TMP49]], align 8 +; CHECK-NEXT: [[TMP82:%.*]] = load double, ptr [[TMP50]], align 8 +; CHECK-NEXT: [[TMP83:%.*]] = load double, ptr [[TMP51]], align 8 +; CHECK-NEXT: [[TMP84:%.*]] = load double, ptr [[TMP52]], align 8 +; CHECK-NEXT: [[TMP85:%.*]] = load double, ptr [[TMP53]], align 8 +; CHECK-NEXT: [[TMP86:%.*]] = load double, ptr [[TMP54]], align 8 +; CHECK-NEXT: [[TMP87:%.*]] = load double, ptr [[TMP55]], align 8 +; CHECK-NEXT: [[TMP88:%.*]] = load double, ptr [[TMP56]], align 8 +; CHECK-NEXT: [[TMP89:%.*]] = load double, ptr [[TMP57]], align 8 +; CHECK-NEXT: [[TMP90:%.*]] = load double, ptr [[TMP58]], align 8 +; CHECK-NEXT: [[TMP91:%.*]] = load double, ptr [[TMP59]], align 8 +; CHECK-NEXT: [[TMP92:%.*]] = load double, ptr [[TMP60]], align 8 +; CHECK-NEXT: [[TMP93:%.*]] = load double, ptr [[TMP61]], align 8 +; CHECK-NEXT: [[TMP94:%.*]] = load double, ptr [[TMP62]], align 8 +; CHECK-NEXT: [[TMP95:%.*]] = load double, ptr [[TMP63]], align 8 +; CHECK-NEXT: [[TMP96:%.*]] = load double, ptr [[TMP64]], align 8 ; CHECK-NEXT: [[TMP0:%.*]] = sub nsw <16 x i64> zeroinitializer, [[VEC_IND]] ; CHECK-NEXT: [[TMP1]] = sub nsw <16 x i64> zeroinitializer, [[STEP_ADD]] ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i64> [[VECTOR_RECUR]], <16 x i64> [[TMP0]], <16 x i32> @@ -244,7 +342,8 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i64> [[TMP1]], i32 15 -; CHECK-NEXT: br label [[SCALAR_PH]] +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i64> [[TMP1]], i32 14 +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[REC_START]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 65, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] @@ -252,16 +351,16 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[NEG_IV:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[IV]] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[GEP_B]], align 8 ; CHECK-NEXT: [[NEG_IV]] = sub nsw i64 0, [[IV]] ; CHECK-NEXT: store i64 [[NEG_IV]], ptr [[GEP]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 2 ; CHECK-NEXT: [[EC:%.*]] = icmp ugt i64 [[IV]], 74 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: exit: -; CHECK-NEXT: [[DOTIN_LCSSA:%.*]] = phi i64 [ [[SCALAR_RECUR]], [[LOOP]] ] -; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi double [ [[L_B]], [[LOOP]] ] +; CHECK-NEXT: [[DOTIN_LCSSA:%.*]] = phi i64 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi double [ [[L_B]], [[LOOP]] ], [ [[TMP96]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store double [[DOTLCSSA]], ptr [[C:%.*]], align 8 ; CHECK-NEXT: ret i64 [[DOTIN_LCSSA]] ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll index 6e83cf612f82b..1c4c73a8df0e9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll @@ -12,68 +12,149 @@ define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[VEC_IV:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], -; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0]], 6 +; CHECK-NEXT: [[TMP2:%.*]] = mul nuw <8 x i64> [[VEC_IV]], ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[B:%.*]], align 1, !llvm.access.group [[ACC_GRP0:![0-9]+]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: ; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[L_OUT:%.*]], i64 [[TMP15]] +; CHECK-NEXT: store i8 0, ptr [[TMP18]], align 1, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] ; CHECK: pred.store.if1: ; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[TMP21]] +; CHECK-NEXT: store i8 0, ptr [[TMP24]], align 1, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2 ; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: ; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i64> [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[TMP29]] +; CHECK-NEXT: store i8 0, ptr [[TMP30]], align 1, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK: pred.store.if5: ; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <8 x i64> [[TMP2]], i32 3 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[TMP52]] +; CHECK-NEXT: store i8 0, ptr [[TMP14]], align 1, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: ; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <8 x i64> [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[TMP53]] +; CHECK-NEXT: store i8 0, ptr [[TMP17]], align 1, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] ; CHECK: pred.store.if9: ; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i64> [[TMP2]], i32 5 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[TMP19]] +; CHECK-NEXT: store i8 0, ptr [[TMP20]], align 1, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] ; CHECK: pred.store.continue10: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] ; CHECK: pred.store.if11: ; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i64> [[TMP2]], i32 6 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[TMP22]] +; CHECK-NEXT: store i8 0, ptr [[TMP23]], align 1, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] ; CHECK: pred.store.continue12: ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7 -; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14]] +; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] ; CHECK: pred.store.if13: ; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i64> [[TMP2]], i32 7 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[TMP25]] +; CHECK-NEXT: store i8 0, ptr [[TMP26]], align 1, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE15]] ; CHECK: pred.store.continue14: -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[L_OUT:%.*]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 -2 -; CHECK-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <48 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = and <48 x i1> [[INTERLEAVED_MASK]], -; CHECK-NEXT: call void @llvm.masked.store.v48i8.p0(<48 x i8> , ptr [[TMP14]], i32 1, <48 x i1> [[TMP15]]) +; CHECK-NEXT: [[TMP27:%.*]] = add <8 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]] +; CHECK: pred.store.if15: +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP27]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[TMP12]] +; CHECK-NEXT: store i8 0, ptr [[TMP13]], align 1, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]] +; CHECK: pred.store.continue16: +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]] +; CHECK: pred.store.if17: +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <8 x i64> [[TMP27]], i32 1 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[TMP32]] +; CHECK-NEXT: store i8 0, ptr [[TMP33]], align 1, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]] +; CHECK: pred.store.continue18: +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2 +; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]] +; CHECK: pred.store.if19: +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i64> [[TMP27]], i32 2 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[TMP35]] +; CHECK-NEXT: store i8 0, ptr [[TMP36]], align 1, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]] +; CHECK: pred.store.continue20: +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3 +; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]] +; CHECK: pred.store.if21: +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <8 x i64> [[TMP27]], i32 3 +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[TMP38]] +; CHECK-NEXT: store i8 0, ptr [[TMP39]], align 1, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]] +; CHECK: pred.store.continue22: +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4 +; CHECK-NEXT: br i1 [[TMP40]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]] +; CHECK: pred.store.if23: +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <8 x i64> [[TMP27]], i32 4 +; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[TMP41]] +; CHECK-NEXT: store i8 0, ptr [[TMP42]], align 1, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]] +; CHECK: pred.store.continue24: +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5 +; CHECK-NEXT: br i1 [[TMP43]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]] +; CHECK: pred.store.if25: +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <8 x i64> [[TMP27]], i32 5 +; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[TMP44]] +; CHECK-NEXT: store i8 0, ptr [[TMP45]], align 1, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]] +; CHECK: pred.store.continue26: +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6 +; CHECK-NEXT: br i1 [[TMP46]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]] +; CHECK: pred.store.if27: +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <8 x i64> [[TMP27]], i32 6 +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[TMP47]] +; CHECK-NEXT: store i8 0, ptr [[TMP48]], align 1, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]] +; CHECK: pred.store.continue28: +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7 +; CHECK-NEXT: br i1 [[TMP49]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE14]] +; CHECK: pred.store.if29: +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <8 x i64> [[TMP27]], i32 7 +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[TMP50]] +; CHECK-NEXT: store i8 0, ptr [[TMP51]], align 1, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] +; CHECK: pred.store.continue30: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IV]], ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10008 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: middle.block: @@ -93,7 +174,7 @@ define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 { ; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX97]], align 1, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 10000 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll index 6b52023cfbcae..ecfd092664319 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll @@ -1081,11 +1081,16 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided2( ; DISABLED_MASKED_STRIDED-NEXT: entry: ; DISABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 +; DISABLED_MASKED_STRIDED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = getelementptr i8, ptr [[Q]], i32 2047 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP167:%.*]] = icmp ult ptr [[TMP166]], [[SCEVGEP]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP167]], label [[FOR_BODY:%.*]], label [[ENTRY:%.*]] +; DISABLED_MASKED_STRIDED: vector.ph: ; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; DISABLED_MASKED_STRIDED: vector.body: -; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] +; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], @@ -1101,316 +1106,343 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if1: +; DISABLED_MASKED_STRIDED: pred.load.if2: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE2]] -; DISABLED_MASKED_STRIDED: pred.load.continue2: +; DISABLED_MASKED_STRIDED: pred.load.continue3: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = phi <8 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if3: +; DISABLED_MASKED_STRIDED: pred.load.if4: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; DISABLED_MASKED_STRIDED: pred.load.continue4: +; DISABLED_MASKED_STRIDED: pred.load.continue5: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = phi <8 x i8> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if5: +; DISABLED_MASKED_STRIDED: pred.load.if6: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE6]] -; DISABLED_MASKED_STRIDED: pred.load.continue6: +; DISABLED_MASKED_STRIDED: pred.load.continue7: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = phi <8 x i8> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if7: +; DISABLED_MASKED_STRIDED: pred.load.if8: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; DISABLED_MASKED_STRIDED: pred.load.continue8: +; DISABLED_MASKED_STRIDED: pred.load.continue9: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = phi <8 x i8> [ [[TMP25]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP30]], [[PRED_LOAD_IF7]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if9: +; DISABLED_MASKED_STRIDED: pred.load.if10: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE10]] -; DISABLED_MASKED_STRIDED: pred.load.continue10: +; DISABLED_MASKED_STRIDED: pred.load.continue11: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = phi <8 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], [[PRED_LOAD_IF9]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if11: +; DISABLED_MASKED_STRIDED: pred.load.if12: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE12]] -; DISABLED_MASKED_STRIDED: pred.load.continue12: +; DISABLED_MASKED_STRIDED: pred.load.continue13: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = phi <8 x i8> [ [[TMP37]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP42]], [[PRED_LOAD_IF11]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if13: +; DISABLED_MASKED_STRIDED: pred.load.if14: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] -; DISABLED_MASKED_STRIDED: pred.load.continue14: +; DISABLED_MASKED_STRIDED: pred.load.continue15: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if15: +; DISABLED_MASKED_STRIDED: pred.load.if16: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP52]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = insertelement <8 x i8> poison, i8 [[TMP54]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE16]] -; DISABLED_MASKED_STRIDED: pred.load.continue16: +; DISABLED_MASKED_STRIDED: pred.load.continue17: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = phi <8 x i8> [ poison, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP55]], [[PRED_LOAD_IF15]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if17: +; DISABLED_MASKED_STRIDED: pred.load.if18: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP58:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP59:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP58]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP60:%.*]] = load i8, ptr [[TMP59]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP61:%.*]] = insertelement <8 x i8> [[TMP56]], i8 [[TMP60]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE18]] -; DISABLED_MASKED_STRIDED: pred.load.continue18: +; DISABLED_MASKED_STRIDED: pred.load.continue19: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP62:%.*]] = phi <8 x i8> [ [[TMP56]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP61]], [[PRED_LOAD_IF17]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP63:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP63]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if19: +; DISABLED_MASKED_STRIDED: pred.load.if20: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP64:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP64]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP66:%.*]] = load i8, ptr [[TMP65]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP67:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP66]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE20]] -; DISABLED_MASKED_STRIDED: pred.load.continue20: +; DISABLED_MASKED_STRIDED: pred.load.continue21: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP68:%.*]] = phi <8 x i8> [ [[TMP62]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP67]], [[PRED_LOAD_IF19]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP69:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP69]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if21: +; DISABLED_MASKED_STRIDED: pred.load.if22: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP70:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP71:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP70]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP72:%.*]] = load i8, ptr [[TMP71]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP73:%.*]] = insertelement <8 x i8> [[TMP68]], i8 [[TMP72]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE22]] -; DISABLED_MASKED_STRIDED: pred.load.continue22: +; DISABLED_MASKED_STRIDED: pred.load.continue23: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP74:%.*]] = phi <8 x i8> [ [[TMP68]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP73]], [[PRED_LOAD_IF21]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP75:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP75]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if23: +; DISABLED_MASKED_STRIDED: pred.load.if24: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP76:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP77:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP76]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP78:%.*]] = load i8, ptr [[TMP77]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP79:%.*]] = insertelement <8 x i8> [[TMP74]], i8 [[TMP78]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE24]] -; DISABLED_MASKED_STRIDED: pred.load.continue24: +; DISABLED_MASKED_STRIDED: pred.load.continue25: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP80:%.*]] = phi <8 x i8> [ [[TMP74]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP79]], [[PRED_LOAD_IF23]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP81:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP81]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if25: +; DISABLED_MASKED_STRIDED: pred.load.if26: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP82:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP82]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP85:%.*]] = insertelement <8 x i8> [[TMP80]], i8 [[TMP84]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE26]] -; DISABLED_MASKED_STRIDED: pred.load.continue26: +; DISABLED_MASKED_STRIDED: pred.load.continue27: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP86:%.*]] = phi <8 x i8> [ [[TMP80]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP85]], [[PRED_LOAD_IF25]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP87:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if27: +; DISABLED_MASKED_STRIDED: pred.load.if28: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP88:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP89:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP88]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP90:%.*]] = load i8, ptr [[TMP89]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP91:%.*]] = insertelement <8 x i8> [[TMP86]], i8 [[TMP90]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE28]] -; DISABLED_MASKED_STRIDED: pred.load.continue28: +; DISABLED_MASKED_STRIDED: pred.load.continue29: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP92:%.*]] = phi <8 x i8> [ [[TMP86]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP91]], [[PRED_LOAD_IF27]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP93:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if29: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE31:%.*]] +; DISABLED_MASKED_STRIDED: pred.load.if30: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP94:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP95:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP94]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP96:%.*]] = load i8, ptr [[TMP95]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP97:%.*]] = insertelement <8 x i8> [[TMP92]], i8 [[TMP96]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE30]] -; DISABLED_MASKED_STRIDED: pred.load.continue30: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE31]] +; DISABLED_MASKED_STRIDED: pred.load.continue31: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP98:%.*]] = phi <8 x i8> [ [[TMP92]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP97]], [[PRED_LOAD_IF29]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP99:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP49]], <8 x i8> [[TMP98]]) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP100:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP101:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP102:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP101]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP102:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP101]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP103:%.*]] = extractelement <8 x i8> [[TMP99]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP103]], ptr [[TMP102]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] ; DISABLED_MASKED_STRIDED: pred.store.continue: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if31: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if32: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP105]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = extractelement <8 x i8> [[TMP99]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP107]], ptr [[TMP106]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE32]] -; DISABLED_MASKED_STRIDED: pred.store.continue32: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE33]] +; DISABLED_MASKED_STRIDED: pred.store.continue33: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if33: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF34:%.*]], label [[PRED_STORE_CONTINUE35:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if34: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP109]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = extractelement <8 x i8> [[TMP99]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP111]], ptr [[TMP110]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE34]] -; DISABLED_MASKED_STRIDED: pred.store.continue34: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE35]] +; DISABLED_MASKED_STRIDED: pred.store.continue35: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if35: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if36: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP113]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = extractelement <8 x i8> [[TMP99]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP115]], ptr [[TMP114]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE36]] -; DISABLED_MASKED_STRIDED: pred.store.continue36: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE37]] +; DISABLED_MASKED_STRIDED: pred.store.continue37: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if37: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF38:%.*]], label [[PRED_STORE_CONTINUE39:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if38: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP117]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = extractelement <8 x i8> [[TMP99]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP119]], ptr [[TMP118]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE38]] -; DISABLED_MASKED_STRIDED: pred.store.continue38: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE39]] +; DISABLED_MASKED_STRIDED: pred.store.continue39: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if39: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF40:%.*]], label [[PRED_STORE_CONTINUE41:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if40: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP121]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = extractelement <8 x i8> [[TMP99]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP123]], ptr [[TMP122]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE40]] -; DISABLED_MASKED_STRIDED: pred.store.continue40: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE41]] +; DISABLED_MASKED_STRIDED: pred.store.continue41: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if41: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF42:%.*]], label [[PRED_STORE_CONTINUE43:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if42: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP125]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = extractelement <8 x i8> [[TMP99]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP127]], ptr [[TMP126]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE42]] -; DISABLED_MASKED_STRIDED: pred.store.continue42: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE43]] +; DISABLED_MASKED_STRIDED: pred.store.continue43: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if43: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE45:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if44: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP129]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = extractelement <8 x i8> [[TMP99]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP131]], ptr [[TMP130]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]] -; DISABLED_MASKED_STRIDED: pred.store.continue44: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE45]] +; DISABLED_MASKED_STRIDED: pred.store.continue45: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if45: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE47:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if46: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP134]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = extractelement <8 x i8> [[TMP132]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], ptr [[TMP135]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]] -; DISABLED_MASKED_STRIDED: pred.store.continue46: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE47]] +; DISABLED_MASKED_STRIDED: pred.store.continue47: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if47: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE49:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if48: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP138]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = extractelement <8 x i8> [[TMP132]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], ptr [[TMP139]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]] -; DISABLED_MASKED_STRIDED: pred.store.continue48: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE49]] +; DISABLED_MASKED_STRIDED: pred.store.continue49: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if49: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE51:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if50: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP142]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP143]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]] -; DISABLED_MASKED_STRIDED: pred.store.continue50: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE51]] +; DISABLED_MASKED_STRIDED: pred.store.continue51: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if51: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE53:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if52: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP146]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP147]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]] -; DISABLED_MASKED_STRIDED: pred.store.continue52: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE53]] +; DISABLED_MASKED_STRIDED: pred.store.continue53: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if53: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE55:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if54: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP150]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP151]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]] -; DISABLED_MASKED_STRIDED: pred.store.continue54: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE55]] +; DISABLED_MASKED_STRIDED: pred.store.continue55: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if55: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE57:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if56: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP154]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP155]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]] -; DISABLED_MASKED_STRIDED: pred.store.continue56: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE57]] +; DISABLED_MASKED_STRIDED: pred.store.continue57: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if57: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE59:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if58: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP158]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], ptr [[TMP159]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]] -; DISABLED_MASKED_STRIDED: pred.store.continue58: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE59]] +; DISABLED_MASKED_STRIDED: pred.store.continue59: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]] -; DISABLED_MASKED_STRIDED: pred.store.if59: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF60:%.*]], label [[PRED_STORE_CONTINUE60]] +; DISABLED_MASKED_STRIDED: pred.store.if60: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP162]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP164]], ptr [[TMP163]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] -; DISABLED_MASKED_STRIDED: pred.store.continue60: +; DISABLED_MASKED_STRIDED: pred.store.continue61: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; DISABLED_MASKED_STRIDED: for.body: +; DISABLED_MASKED_STRIDED-NEXT: [[IX_024:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY1:%.*]] ] +; DISABLED_MASKED_STRIDED-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_024]], [[CONV]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; DISABLED_MASKED_STRIDED: if.then: +; DISABLED_MASKED_STRIDED-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_024]], 1 +; DISABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[MUL]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP168:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[ADD:%.*]] = or disjoint i32 [[MUL]], 1 +; DISABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[ADD]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP169:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[SPEC_SELECT_I:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP168]], i8 [[TMP169]]) +; DISABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[MUL]] +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[SPEC_SELECT_I]], ptr [[ARRAYIDX6]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[SUB:%.*]] = sub i8 0, [[SPEC_SELECT_I]] +; DISABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[ADD]] +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[SUB]], ptr [[ARRAYIDX11]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: br label [[FOR_INC]] +; DISABLED_MASKED_STRIDED: for.inc: +; DISABLED_MASKED_STRIDED-NEXT: [[INC]] = add nuw nsw i32 [[IX_024]], 1 +; DISABLED_MASKED_STRIDED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; ; ENABLED_MASKED_STRIDED-LABEL: @masked_strided2( ; ENABLED_MASKED_STRIDED-NEXT: entry: ; ENABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 +; ENABLED_MASKED_STRIDED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Q]], i32 2047 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = icmp ult ptr [[TMP9]], [[SCEVGEP]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP10]], label [[FOR_BODY:%.*]], label [[ENTRY:%.*]] +; ENABLED_MASKED_STRIDED: vector.ph: ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: -; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 @@ -1422,7 +1454,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = or disjoint i32 [[TMP1]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8> [[STRIDED_VEC1]]) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = sub <8 x i8> zeroinitializer, [[TMP4]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[TMP3]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[Q]], i32 [[TMP3]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 -1 ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP7]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) @@ -1430,6 +1462,28 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; ENABLED_MASKED_STRIDED: for.body: +; ENABLED_MASKED_STRIDED-NEXT: [[IX_024:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY1:%.*]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_024]], [[CONV]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; ENABLED_MASKED_STRIDED: if.then: +; ENABLED_MASKED_STRIDED-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_024]], 1 +; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[MUL]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[ADD:%.*]] = or disjoint i32 [[MUL]], 1 +; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[ADD]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[SPEC_SELECT_I:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP11]], i8 [[TMP12]]) +; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[MUL]] +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[SPEC_SELECT_I]], ptr [[ARRAYIDX6]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[SUB:%.*]] = sub i8 0, [[SPEC_SELECT_I]] +; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[ADD]] +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[SUB]], ptr [[ARRAYIDX11]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[FOR_INC]] +; ENABLED_MASKED_STRIDED: for.inc: +; ENABLED_MASKED_STRIDED-NEXT: [[INC]] = add nuw nsw i32 [[IX_024]], 1 +; ENABLED_MASKED_STRIDED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -1488,11 +1542,16 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided2_reverse( ; DISABLED_MASKED_STRIDED-NEXT: entry: ; DISABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 +; DISABLED_MASKED_STRIDED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 2049 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = getelementptr i8, ptr [[Q]], i32 3 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP167:%.*]] = icmp ugt ptr [[TMP166]], [[SCEVGEP]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP167]], label [[FOR_BODY:%.*]], label [[ENTRY:%.*]] +; DISABLED_MASKED_STRIDED: vector.ph: ; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; DISABLED_MASKED_STRIDED: vector.body: -; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] +; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], @@ -1508,316 +1567,343 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if1: +; DISABLED_MASKED_STRIDED: pred.load.if2: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE2]] -; DISABLED_MASKED_STRIDED: pred.load.continue2: +; DISABLED_MASKED_STRIDED: pred.load.continue3: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = phi <8 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if3: +; DISABLED_MASKED_STRIDED: pred.load.if4: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; DISABLED_MASKED_STRIDED: pred.load.continue4: +; DISABLED_MASKED_STRIDED: pred.load.continue5: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = phi <8 x i8> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if5: +; DISABLED_MASKED_STRIDED: pred.load.if6: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE6]] -; DISABLED_MASKED_STRIDED: pred.load.continue6: +; DISABLED_MASKED_STRIDED: pred.load.continue7: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = phi <8 x i8> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if7: +; DISABLED_MASKED_STRIDED: pred.load.if8: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; DISABLED_MASKED_STRIDED: pred.load.continue8: +; DISABLED_MASKED_STRIDED: pred.load.continue9: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = phi <8 x i8> [ [[TMP25]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP30]], [[PRED_LOAD_IF7]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if9: +; DISABLED_MASKED_STRIDED: pred.load.if10: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE10]] -; DISABLED_MASKED_STRIDED: pred.load.continue10: +; DISABLED_MASKED_STRIDED: pred.load.continue11: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = phi <8 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], [[PRED_LOAD_IF9]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if11: +; DISABLED_MASKED_STRIDED: pred.load.if12: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE12]] -; DISABLED_MASKED_STRIDED: pred.load.continue12: +; DISABLED_MASKED_STRIDED: pred.load.continue13: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = phi <8 x i8> [ [[TMP37]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP42]], [[PRED_LOAD_IF11]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if13: +; DISABLED_MASKED_STRIDED: pred.load.if14: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] -; DISABLED_MASKED_STRIDED: pred.load.continue14: +; DISABLED_MASKED_STRIDED: pred.load.continue15: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if15: +; DISABLED_MASKED_STRIDED: pred.load.if16: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP52]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = insertelement <8 x i8> poison, i8 [[TMP54]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE16]] -; DISABLED_MASKED_STRIDED: pred.load.continue16: +; DISABLED_MASKED_STRIDED: pred.load.continue17: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = phi <8 x i8> [ poison, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP55]], [[PRED_LOAD_IF15]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if17: +; DISABLED_MASKED_STRIDED: pred.load.if18: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP58:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP59:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP58]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP60:%.*]] = load i8, ptr [[TMP59]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP61:%.*]] = insertelement <8 x i8> [[TMP56]], i8 [[TMP60]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE18]] -; DISABLED_MASKED_STRIDED: pred.load.continue18: +; DISABLED_MASKED_STRIDED: pred.load.continue19: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP62:%.*]] = phi <8 x i8> [ [[TMP56]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP61]], [[PRED_LOAD_IF17]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP63:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP63]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if19: +; DISABLED_MASKED_STRIDED: pred.load.if20: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP64:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP64]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP66:%.*]] = load i8, ptr [[TMP65]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP67:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP66]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE20]] -; DISABLED_MASKED_STRIDED: pred.load.continue20: +; DISABLED_MASKED_STRIDED: pred.load.continue21: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP68:%.*]] = phi <8 x i8> [ [[TMP62]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP67]], [[PRED_LOAD_IF19]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP69:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP69]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if21: +; DISABLED_MASKED_STRIDED: pred.load.if22: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP70:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP71:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP70]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP72:%.*]] = load i8, ptr [[TMP71]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP73:%.*]] = insertelement <8 x i8> [[TMP68]], i8 [[TMP72]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE22]] -; DISABLED_MASKED_STRIDED: pred.load.continue22: +; DISABLED_MASKED_STRIDED: pred.load.continue23: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP74:%.*]] = phi <8 x i8> [ [[TMP68]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP73]], [[PRED_LOAD_IF21]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP75:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP75]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if23: +; DISABLED_MASKED_STRIDED: pred.load.if24: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP76:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP77:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP76]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP78:%.*]] = load i8, ptr [[TMP77]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP79:%.*]] = insertelement <8 x i8> [[TMP74]], i8 [[TMP78]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE24]] -; DISABLED_MASKED_STRIDED: pred.load.continue24: +; DISABLED_MASKED_STRIDED: pred.load.continue25: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP80:%.*]] = phi <8 x i8> [ [[TMP74]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP79]], [[PRED_LOAD_IF23]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP81:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP81]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if25: +; DISABLED_MASKED_STRIDED: pred.load.if26: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP82:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP82]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP85:%.*]] = insertelement <8 x i8> [[TMP80]], i8 [[TMP84]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE26]] -; DISABLED_MASKED_STRIDED: pred.load.continue26: +; DISABLED_MASKED_STRIDED: pred.load.continue27: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP86:%.*]] = phi <8 x i8> [ [[TMP80]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP85]], [[PRED_LOAD_IF25]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP87:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if27: +; DISABLED_MASKED_STRIDED: pred.load.if28: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP88:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP89:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP88]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP90:%.*]] = load i8, ptr [[TMP89]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP91:%.*]] = insertelement <8 x i8> [[TMP86]], i8 [[TMP90]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE28]] -; DISABLED_MASKED_STRIDED: pred.load.continue28: +; DISABLED_MASKED_STRIDED: pred.load.continue29: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP92:%.*]] = phi <8 x i8> [ [[TMP86]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP91]], [[PRED_LOAD_IF27]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP93:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if29: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE31:%.*]] +; DISABLED_MASKED_STRIDED: pred.load.if30: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP94:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP95:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP94]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP96:%.*]] = load i8, ptr [[TMP95]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP97:%.*]] = insertelement <8 x i8> [[TMP92]], i8 [[TMP96]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE30]] -; DISABLED_MASKED_STRIDED: pred.load.continue30: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE31]] +; DISABLED_MASKED_STRIDED: pred.load.continue31: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP98:%.*]] = phi <8 x i8> [ [[TMP92]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP97]], [[PRED_LOAD_IF29]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP99:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP49]], <8 x i8> [[TMP98]]) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP100:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP101:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP102:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP101]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP102:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP101]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP103:%.*]] = extractelement <8 x i8> [[TMP99]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP103]], ptr [[TMP102]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] ; DISABLED_MASKED_STRIDED: pred.store.continue: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if31: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if32: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP105]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = extractelement <8 x i8> [[TMP99]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP107]], ptr [[TMP106]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE32]] -; DISABLED_MASKED_STRIDED: pred.store.continue32: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE33]] +; DISABLED_MASKED_STRIDED: pred.store.continue33: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if33: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF34:%.*]], label [[PRED_STORE_CONTINUE35:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if34: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP109]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = extractelement <8 x i8> [[TMP99]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP111]], ptr [[TMP110]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE34]] -; DISABLED_MASKED_STRIDED: pred.store.continue34: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE35]] +; DISABLED_MASKED_STRIDED: pred.store.continue35: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if35: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if36: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP113]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = extractelement <8 x i8> [[TMP99]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP115]], ptr [[TMP114]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE36]] -; DISABLED_MASKED_STRIDED: pred.store.continue36: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE37]] +; DISABLED_MASKED_STRIDED: pred.store.continue37: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if37: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF38:%.*]], label [[PRED_STORE_CONTINUE39:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if38: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP117]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = extractelement <8 x i8> [[TMP99]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP119]], ptr [[TMP118]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE38]] -; DISABLED_MASKED_STRIDED: pred.store.continue38: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE39]] +; DISABLED_MASKED_STRIDED: pred.store.continue39: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if39: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF40:%.*]], label [[PRED_STORE_CONTINUE41:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if40: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP121]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = extractelement <8 x i8> [[TMP99]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP123]], ptr [[TMP122]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE40]] -; DISABLED_MASKED_STRIDED: pred.store.continue40: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE41]] +; DISABLED_MASKED_STRIDED: pred.store.continue41: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if41: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF42:%.*]], label [[PRED_STORE_CONTINUE43:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if42: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP125]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = extractelement <8 x i8> [[TMP99]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP127]], ptr [[TMP126]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE42]] -; DISABLED_MASKED_STRIDED: pred.store.continue42: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE43]] +; DISABLED_MASKED_STRIDED: pred.store.continue43: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if43: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE45:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if44: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP129]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = extractelement <8 x i8> [[TMP99]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP131]], ptr [[TMP130]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]] -; DISABLED_MASKED_STRIDED: pred.store.continue44: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE45]] +; DISABLED_MASKED_STRIDED: pred.store.continue45: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if45: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE47:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if46: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP134]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = extractelement <8 x i8> [[TMP132]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], ptr [[TMP135]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]] -; DISABLED_MASKED_STRIDED: pred.store.continue46: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE47]] +; DISABLED_MASKED_STRIDED: pred.store.continue47: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if47: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE49:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if48: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP138]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = extractelement <8 x i8> [[TMP132]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], ptr [[TMP139]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]] -; DISABLED_MASKED_STRIDED: pred.store.continue48: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE49]] +; DISABLED_MASKED_STRIDED: pred.store.continue49: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if49: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE51:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if50: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP142]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP143]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]] -; DISABLED_MASKED_STRIDED: pred.store.continue50: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE51]] +; DISABLED_MASKED_STRIDED: pred.store.continue51: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if51: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE53:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if52: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP146]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP147]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]] -; DISABLED_MASKED_STRIDED: pred.store.continue52: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE53]] +; DISABLED_MASKED_STRIDED: pred.store.continue53: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if53: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE55:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if54: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP150]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP151]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]] -; DISABLED_MASKED_STRIDED: pred.store.continue54: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE55]] +; DISABLED_MASKED_STRIDED: pred.store.continue55: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if55: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE57:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if56: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP154]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP155]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]] -; DISABLED_MASKED_STRIDED: pred.store.continue56: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE57]] +; DISABLED_MASKED_STRIDED: pred.store.continue57: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if57: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE59:%.*]] +; DISABLED_MASKED_STRIDED: pred.store.if58: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP158]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], ptr [[TMP159]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]] -; DISABLED_MASKED_STRIDED: pred.store.continue58: +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE59]] +; DISABLED_MASKED_STRIDED: pred.store.continue59: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]] -; DISABLED_MASKED_STRIDED: pred.store.if59: +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF60:%.*]], label [[PRED_STORE_CONTINUE60]] +; DISABLED_MASKED_STRIDED: pred.store.if60: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP162]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP164]], ptr [[TMP163]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] -; DISABLED_MASKED_STRIDED: pred.store.continue60: +; DISABLED_MASKED_STRIDED: pred.store.continue61: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; DISABLED_MASKED_STRIDED: for.body: +; DISABLED_MASKED_STRIDED-NEXT: [[IX_024:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 1024, [[ENTRY1:%.*]] ] +; DISABLED_MASKED_STRIDED-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_024]], [[CONV]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; DISABLED_MASKED_STRIDED: if.then: +; DISABLED_MASKED_STRIDED-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_024]], 1 +; DISABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[MUL]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP168:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[ADD:%.*]] = or disjoint i32 [[MUL]], 1 +; DISABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[ADD]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP169:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[SPEC_SELECT_I:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP168]], i8 [[TMP169]]) +; DISABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[MUL]] +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[SPEC_SELECT_I]], ptr [[ARRAYIDX6]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[SUB:%.*]] = sub i8 0, [[SPEC_SELECT_I]] +; DISABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[ADD]] +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[SUB]], ptr [[ARRAYIDX11]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: br label [[FOR_INC]] +; DISABLED_MASKED_STRIDED: for.inc: +; DISABLED_MASKED_STRIDED-NEXT: [[INC]] = add nsw i32 [[IX_024]], -1 +; DISABLED_MASKED_STRIDED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 0 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; ; ENABLED_MASKED_STRIDED-LABEL: @masked_strided2_reverse( ; ENABLED_MASKED_STRIDED-NEXT: entry: ; ENABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 +; ENABLED_MASKED_STRIDED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 2049 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = getelementptr i8, ptr [[Q]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP167:%.*]] = icmp ugt ptr [[TMP166]], [[SCEVGEP]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP167]], label [[FOR_BODY:%.*]], label [[ENTRY:%.*]] +; ENABLED_MASKED_STRIDED: vector.ph: ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: -; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], @@ -1833,305 +1919,327 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] -; ENABLED_MASKED_STRIDED: pred.load.if1: +; ENABLED_MASKED_STRIDED: pred.load.if2: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE2]] -; ENABLED_MASKED_STRIDED: pred.load.continue2: +; ENABLED_MASKED_STRIDED: pred.load.continue3: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = phi <8 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] -; ENABLED_MASKED_STRIDED: pred.load.if3: +; ENABLED_MASKED_STRIDED: pred.load.if4: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; ENABLED_MASKED_STRIDED: pred.load.continue4: +; ENABLED_MASKED_STRIDED: pred.load.continue5: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = phi <8 x i8> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] -; ENABLED_MASKED_STRIDED: pred.load.if5: +; ENABLED_MASKED_STRIDED: pred.load.if6: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE6]] -; ENABLED_MASKED_STRIDED: pred.load.continue6: +; ENABLED_MASKED_STRIDED: pred.load.continue7: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = phi <8 x i8> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] -; ENABLED_MASKED_STRIDED: pred.load.if7: +; ENABLED_MASKED_STRIDED: pred.load.if8: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; ENABLED_MASKED_STRIDED: pred.load.continue8: +; ENABLED_MASKED_STRIDED: pred.load.continue9: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = phi <8 x i8> [ [[TMP25]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP30]], [[PRED_LOAD_IF7]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] -; ENABLED_MASKED_STRIDED: pred.load.if9: +; ENABLED_MASKED_STRIDED: pred.load.if10: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE10]] -; ENABLED_MASKED_STRIDED: pred.load.continue10: +; ENABLED_MASKED_STRIDED: pred.load.continue11: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = phi <8 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], [[PRED_LOAD_IF9]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] -; ENABLED_MASKED_STRIDED: pred.load.if11: +; ENABLED_MASKED_STRIDED: pred.load.if12: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE12]] -; ENABLED_MASKED_STRIDED: pred.load.continue12: +; ENABLED_MASKED_STRIDED: pred.load.continue13: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = phi <8 x i8> [ [[TMP37]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP42]], [[PRED_LOAD_IF11]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]] -; ENABLED_MASKED_STRIDED: pred.load.if13: +; ENABLED_MASKED_STRIDED: pred.load.if14: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] -; ENABLED_MASKED_STRIDED: pred.load.continue14: +; ENABLED_MASKED_STRIDED: pred.load.continue15: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], ; ENABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]] -; ENABLED_MASKED_STRIDED: pred.load.if15: +; ENABLED_MASKED_STRIDED: pred.load.if16: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP52]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = insertelement <8 x i8> poison, i8 [[TMP54]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE16]] -; ENABLED_MASKED_STRIDED: pred.load.continue16: +; ENABLED_MASKED_STRIDED: pred.load.continue17: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = phi <8 x i8> [ poison, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP55]], [[PRED_LOAD_IF15]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]] -; ENABLED_MASKED_STRIDED: pred.load.if17: +; ENABLED_MASKED_STRIDED: pred.load.if18: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP58:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP59:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP58]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP60:%.*]] = load i8, ptr [[TMP59]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP61:%.*]] = insertelement <8 x i8> [[TMP56]], i8 [[TMP60]], i64 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE18]] -; ENABLED_MASKED_STRIDED: pred.load.continue18: +; ENABLED_MASKED_STRIDED: pred.load.continue19: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP62:%.*]] = phi <8 x i8> [ [[TMP56]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP61]], [[PRED_LOAD_IF17]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP63:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP63]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]] -; ENABLED_MASKED_STRIDED: pred.load.if19: +; ENABLED_MASKED_STRIDED: pred.load.if20: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP64:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP64]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP66:%.*]] = load i8, ptr [[TMP65]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP67:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP66]], i64 2 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE20]] -; ENABLED_MASKED_STRIDED: pred.load.continue20: +; ENABLED_MASKED_STRIDED: pred.load.continue21: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP68:%.*]] = phi <8 x i8> [ [[TMP62]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP67]], [[PRED_LOAD_IF19]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP69:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP69]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]] -; ENABLED_MASKED_STRIDED: pred.load.if21: +; ENABLED_MASKED_STRIDED: pred.load.if22: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP70:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP71:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP70]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP72:%.*]] = load i8, ptr [[TMP71]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP73:%.*]] = insertelement <8 x i8> [[TMP68]], i8 [[TMP72]], i64 3 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE22]] -; ENABLED_MASKED_STRIDED: pred.load.continue22: +; ENABLED_MASKED_STRIDED: pred.load.continue23: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP74:%.*]] = phi <8 x i8> [ [[TMP68]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP73]], [[PRED_LOAD_IF21]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP75:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP75]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]] -; ENABLED_MASKED_STRIDED: pred.load.if23: +; ENABLED_MASKED_STRIDED: pred.load.if24: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP76:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP77:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP76]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP78:%.*]] = load i8, ptr [[TMP77]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP79:%.*]] = insertelement <8 x i8> [[TMP74]], i8 [[TMP78]], i64 4 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE24]] -; ENABLED_MASKED_STRIDED: pred.load.continue24: +; ENABLED_MASKED_STRIDED: pred.load.continue25: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP80:%.*]] = phi <8 x i8> [ [[TMP74]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP79]], [[PRED_LOAD_IF23]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP81:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP81]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]] -; ENABLED_MASKED_STRIDED: pred.load.if25: +; ENABLED_MASKED_STRIDED: pred.load.if26: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP82:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP82]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP85:%.*]] = insertelement <8 x i8> [[TMP80]], i8 [[TMP84]], i64 5 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE26]] -; ENABLED_MASKED_STRIDED: pred.load.continue26: +; ENABLED_MASKED_STRIDED: pred.load.continue27: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP86:%.*]] = phi <8 x i8> [ [[TMP80]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP85]], [[PRED_LOAD_IF25]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP87:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]] -; ENABLED_MASKED_STRIDED: pred.load.if27: +; ENABLED_MASKED_STRIDED: pred.load.if28: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP88:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP89:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP88]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP90:%.*]] = load i8, ptr [[TMP89]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP91:%.*]] = insertelement <8 x i8> [[TMP86]], i8 [[TMP90]], i64 6 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE28]] -; ENABLED_MASKED_STRIDED: pred.load.continue28: +; ENABLED_MASKED_STRIDED: pred.load.continue29: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP92:%.*]] = phi <8 x i8> [ [[TMP86]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP91]], [[PRED_LOAD_IF27]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP93:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]] -; ENABLED_MASKED_STRIDED: pred.load.if29: +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE31:%.*]] +; ENABLED_MASKED_STRIDED: pred.load.if30: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP94:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP95:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP94]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP96:%.*]] = load i8, ptr [[TMP95]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP97:%.*]] = insertelement <8 x i8> [[TMP92]], i8 [[TMP96]], i64 7 -; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE30]] -; ENABLED_MASKED_STRIDED: pred.load.continue30: +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE31]] +; ENABLED_MASKED_STRIDED: pred.load.continue31: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP98:%.*]] = phi <8 x i8> [ [[TMP92]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP97]], [[PRED_LOAD_IF29]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP99:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP49]], <8 x i8> [[TMP98]]) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP100:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP101:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP102:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP101]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP102:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP101]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP103:%.*]] = extractelement <8 x i8> [[TMP99]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP103]], ptr [[TMP102]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] ; ENABLED_MASKED_STRIDED: pred.store.continue: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]] -; ENABLED_MASKED_STRIDED: pred.store.if31: +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if32: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP105]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = extractelement <8 x i8> [[TMP99]], i64 1 ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP107]], ptr [[TMP106]], align 1 -; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE32]] -; ENABLED_MASKED_STRIDED: pred.store.continue32: +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE33]] +; ENABLED_MASKED_STRIDED: pred.store.continue33: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] -; ENABLED_MASKED_STRIDED: pred.store.if33: +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF34:%.*]], label [[PRED_STORE_CONTINUE35:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if34: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP109]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = extractelement <8 x i8> [[TMP99]], i64 2 ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP111]], ptr [[TMP110]], align 1 -; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE34]] -; ENABLED_MASKED_STRIDED: pred.store.continue34: +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE35]] +; ENABLED_MASKED_STRIDED: pred.store.continue35: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]] -; ENABLED_MASKED_STRIDED: pred.store.if35: +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if36: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP113]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = extractelement <8 x i8> [[TMP99]], i64 3 ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP115]], ptr [[TMP114]], align 1 -; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE36]] -; ENABLED_MASKED_STRIDED: pred.store.continue36: +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE37]] +; ENABLED_MASKED_STRIDED: pred.store.continue37: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]] -; ENABLED_MASKED_STRIDED: pred.store.if37: +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF38:%.*]], label [[PRED_STORE_CONTINUE39:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if38: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP117]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = extractelement <8 x i8> [[TMP99]], i64 4 ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP119]], ptr [[TMP118]], align 1 -; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE38]] -; ENABLED_MASKED_STRIDED: pred.store.continue38: +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE39]] +; ENABLED_MASKED_STRIDED: pred.store.continue39: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]] -; ENABLED_MASKED_STRIDED: pred.store.if39: +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF40:%.*]], label [[PRED_STORE_CONTINUE41:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if40: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP121]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = extractelement <8 x i8> [[TMP99]], i64 5 ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP123]], ptr [[TMP122]], align 1 -; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE40]] -; ENABLED_MASKED_STRIDED: pred.store.continue40: +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE41]] +; ENABLED_MASKED_STRIDED: pred.store.continue41: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]] -; ENABLED_MASKED_STRIDED: pred.store.if41: +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF42:%.*]], label [[PRED_STORE_CONTINUE43:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if42: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP125]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = extractelement <8 x i8> [[TMP99]], i64 6 ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP127]], ptr [[TMP126]], align 1 -; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE42]] -; ENABLED_MASKED_STRIDED: pred.store.continue42: +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE43]] +; ENABLED_MASKED_STRIDED: pred.store.continue43: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]] -; ENABLED_MASKED_STRIDED: pred.store.if43: +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE45:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if44: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP129]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = extractelement <8 x i8> [[TMP99]], i64 7 ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP131]], ptr [[TMP130]], align 1 -; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]] -; ENABLED_MASKED_STRIDED: pred.store.continue44: +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE45]] +; ENABLED_MASKED_STRIDED: pred.store.continue45: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] -; ENABLED_MASKED_STRIDED: pred.store.if45: +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE47:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if46: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP134]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = extractelement <8 x i8> [[TMP132]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], ptr [[TMP135]], align 1 -; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]] -; ENABLED_MASKED_STRIDED: pred.store.continue46: +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE47]] +; ENABLED_MASKED_STRIDED: pred.store.continue47: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] -; ENABLED_MASKED_STRIDED: pred.store.if47: +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE49:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if48: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP138]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = extractelement <8 x i8> [[TMP132]], i64 1 ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], ptr [[TMP139]], align 1 -; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]] -; ENABLED_MASKED_STRIDED: pred.store.continue48: +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE49]] +; ENABLED_MASKED_STRIDED: pred.store.continue49: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] -; ENABLED_MASKED_STRIDED: pred.store.if49: +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE51:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if50: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP142]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2 ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP143]], align 1 -; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]] -; ENABLED_MASKED_STRIDED: pred.store.continue50: +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE51]] +; ENABLED_MASKED_STRIDED: pred.store.continue51: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] -; ENABLED_MASKED_STRIDED: pred.store.if51: +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE53:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if52: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP146]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3 ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP147]], align 1 -; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]] -; ENABLED_MASKED_STRIDED: pred.store.continue52: +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE53]] +; ENABLED_MASKED_STRIDED: pred.store.continue53: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] -; ENABLED_MASKED_STRIDED: pred.store.if53: +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE55:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if54: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP150]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4 ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP151]], align 1 -; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]] -; ENABLED_MASKED_STRIDED: pred.store.continue54: +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE55]] +; ENABLED_MASKED_STRIDED: pred.store.continue55: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] -; ENABLED_MASKED_STRIDED: pred.store.if55: +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE57:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if56: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP154]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5 ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP155]], align 1 -; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]] -; ENABLED_MASKED_STRIDED: pred.store.continue56: +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE57]] +; ENABLED_MASKED_STRIDED: pred.store.continue57: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] -; ENABLED_MASKED_STRIDED: pred.store.if57: +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE59:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if58: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP158]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6 ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], ptr [[TMP159]], align 1 -; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]] -; ENABLED_MASKED_STRIDED: pred.store.continue58: +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE59]] +; ENABLED_MASKED_STRIDED: pred.store.continue59: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]] -; ENABLED_MASKED_STRIDED: pred.store.if59: +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF60:%.*]], label [[PRED_STORE_CONTINUE60]] +; ENABLED_MASKED_STRIDED: pred.store.if60: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP162]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7 ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP164]], ptr [[TMP163]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] -; ENABLED_MASKED_STRIDED: pred.store.continue60: +; ENABLED_MASKED_STRIDED: pred.store.continue61: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; ENABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; ENABLED_MASKED_STRIDED: for.body: +; ENABLED_MASKED_STRIDED-NEXT: [[IX_024:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 1024, [[ENTRY1:%.*]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_024]], [[CONV]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; ENABLED_MASKED_STRIDED: if.then: +; ENABLED_MASKED_STRIDED-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_024]], 1 +; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[MUL]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP168:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[ADD:%.*]] = or disjoint i32 [[MUL]], 1 +; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[ADD]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP169:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[SPEC_SELECT_I:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP168]], i8 [[TMP169]]) +; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[MUL]] +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[SPEC_SELECT_I]], ptr [[ARRAYIDX6]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[SUB:%.*]] = sub i8 0, [[SPEC_SELECT_I]] +; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[ADD]] +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[SUB]], ptr [[ARRAYIDX11]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[FOR_INC]] +; ENABLED_MASKED_STRIDED: for.inc: +; ENABLED_MASKED_STRIDED-NEXT: [[INC]] = add nsw i32 [[IX_024]], -1 +; ENABLED_MASKED_STRIDED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -2199,334 +2307,29 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided2_unknown_tc( ; DISABLED_MASKED_STRIDED-NEXT: entry: ; DISABLED_MASKED_STRIDED-NEXT: [[CMP22:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[CMP22]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]] -; DISABLED_MASKED_STRIDED: vector.ph: -; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add nuw i32 [[N]], 7 -; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 -; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[N]], -1 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[GUARD:%.*]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer -; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] -; DISABLED_MASKED_STRIDED: vector.body: -; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE62:%.*]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE62]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer -; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], -; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP5]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> poison, i8 [[TMP7]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]] -; DISABLED_MASKED_STRIDED: pred.load.continue: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = phi <8 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if3: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP11]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[TMP13]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; DISABLED_MASKED_STRIDED: pred.load.continue4: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = phi <8 x i8> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP3]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if5: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP17]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP19]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE6]] -; DISABLED_MASKED_STRIDED: pred.load.continue6: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = phi <8 x i8> [ [[TMP15]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP20]], [[PRED_LOAD_IF5]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP3]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP22]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if7: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP23]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = insertelement <8 x i8> [[TMP21]], i8 [[TMP25]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; DISABLED_MASKED_STRIDED: pred.load.continue8: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = phi <8 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP26]], [[PRED_LOAD_IF7]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP3]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP28]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if9: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP29]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP31]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE10]] -; DISABLED_MASKED_STRIDED: pred.load.continue10: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = phi <8 x i8> [ [[TMP27]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = extractelement <8 x i1> [[TMP3]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if11: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP35]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = insertelement <8 x i8> [[TMP33]], i8 [[TMP37]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE12]] -; DISABLED_MASKED_STRIDED: pred.load.continue12: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = phi <8 x i8> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP38]], [[PRED_LOAD_IF11]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = extractelement <8 x i1> [[TMP3]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP40]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if13: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP41]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = load i8, ptr [[TMP42]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = insertelement <8 x i8> [[TMP39]], i8 [[TMP43]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] -; DISABLED_MASKED_STRIDED: pred.load.continue14: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = phi <8 x i8> [ [[TMP39]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP44]], [[PRED_LOAD_IF13]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = extractelement <8 x i1> [[TMP3]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP46]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if15: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP47]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = insertelement <8 x i8> [[TMP45]], i8 [[TMP49]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE16]] -; DISABLED_MASKED_STRIDED: pred.load.continue16: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = phi <8 x i8> [ [[TMP45]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP50]], [[PRED_LOAD_IF15]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = or disjoint <8 x i32> [[TMP2]], -; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP53]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if17: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = extractelement <8 x i32> [[TMP52]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP54]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = load i8, ptr [[TMP55]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = insertelement <8 x i8> poison, i8 [[TMP56]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE18]] -; DISABLED_MASKED_STRIDED: pred.load.continue18: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP58:%.*]] = phi <8 x i8> [ poison, [[PRED_LOAD_CONTINUE16]] ], [ [[TMP57]], [[PRED_LOAD_IF17]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP59:%.*]] = extractelement <8 x i1> [[TMP3]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP59]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if19: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP60:%.*]] = extractelement <8 x i32> [[TMP52]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP60]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP62:%.*]] = load i8, ptr [[TMP61]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP63:%.*]] = insertelement <8 x i8> [[TMP58]], i8 [[TMP62]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE20]] -; DISABLED_MASKED_STRIDED: pred.load.continue20: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP64:%.*]] = phi <8 x i8> [ [[TMP58]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP63]], [[PRED_LOAD_IF19]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP65:%.*]] = extractelement <8 x i1> [[TMP3]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP65]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if21: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP66:%.*]] = extractelement <8 x i32> [[TMP52]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP67:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP66]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP68:%.*]] = load i8, ptr [[TMP67]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP69:%.*]] = insertelement <8 x i8> [[TMP64]], i8 [[TMP68]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE22]] -; DISABLED_MASKED_STRIDED: pred.load.continue22: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP70:%.*]] = phi <8 x i8> [ [[TMP64]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP69]], [[PRED_LOAD_IF21]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP71:%.*]] = extractelement <8 x i1> [[TMP3]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP71]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if23: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP72:%.*]] = extractelement <8 x i32> [[TMP52]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP73:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP72]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP74:%.*]] = load i8, ptr [[TMP73]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP75:%.*]] = insertelement <8 x i8> [[TMP70]], i8 [[TMP74]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE24]] -; DISABLED_MASKED_STRIDED: pred.load.continue24: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP76:%.*]] = phi <8 x i8> [ [[TMP70]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP75]], [[PRED_LOAD_IF23]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP77:%.*]] = extractelement <8 x i1> [[TMP3]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP77]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if25: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP78:%.*]] = extractelement <8 x i32> [[TMP52]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP79:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP78]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP80:%.*]] = load i8, ptr [[TMP79]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP81:%.*]] = insertelement <8 x i8> [[TMP76]], i8 [[TMP80]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE26]] -; DISABLED_MASKED_STRIDED: pred.load.continue26: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP82:%.*]] = phi <8 x i8> [ [[TMP76]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP81]], [[PRED_LOAD_IF25]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP83:%.*]] = extractelement <8 x i1> [[TMP3]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP83]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if27: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP84:%.*]] = extractelement <8 x i32> [[TMP52]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP85:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP84]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP86:%.*]] = load i8, ptr [[TMP85]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP87:%.*]] = insertelement <8 x i8> [[TMP82]], i8 [[TMP86]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE28]] -; DISABLED_MASKED_STRIDED: pred.load.continue28: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP88:%.*]] = phi <8 x i8> [ [[TMP82]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP87]], [[PRED_LOAD_IF27]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP89:%.*]] = extractelement <8 x i1> [[TMP3]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP89]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if29: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP90:%.*]] = extractelement <8 x i32> [[TMP52]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP91:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP90]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[CMP22]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +; DISABLED_MASKED_STRIDED: for.body: +; DISABLED_MASKED_STRIDED-NEXT: [[IX_023:%.*]] = phi i32 [ [[INC:%.*]], [[PRED_LOAD_CONTINUE30:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP89:%.*]] = icmp sgt i32 [[IX_023]], [[GUARD:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP89]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30]] +; DISABLED_MASKED_STRIDED: if.then: +; DISABLED_MASKED_STRIDED-NEXT: [[TMP90:%.*]] = shl nuw nsw i32 [[IX_023]], 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP91:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP90]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP92:%.*]] = load i8, ptr [[TMP91]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP93:%.*]] = insertelement <8 x i8> [[TMP88]], i8 [[TMP92]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE30]] -; DISABLED_MASKED_STRIDED: pred.load.continue30: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP94:%.*]] = phi <8 x i8> [ [[TMP88]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP93]], [[PRED_LOAD_IF29]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP95:%.*]] = extractelement <8 x i1> [[TMP3]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP95]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]] -; DISABLED_MASKED_STRIDED: pred.load.if31: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP96:%.*]] = extractelement <8 x i32> [[TMP52]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP96:%.*]] = or disjoint i32 [[TMP90]], 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP97:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP96]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP98:%.*]] = load i8, ptr [[TMP97]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP99:%.*]] = insertelement <8 x i8> [[TMP94]], i8 [[TMP98]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE32]] -; DISABLED_MASKED_STRIDED: pred.load.continue32: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP100:%.*]] = phi <8 x i8> [ [[TMP94]], [[PRED_LOAD_CONTINUE30]] ], [ [[TMP99]], [[PRED_LOAD_IF31]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP101:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP51]], <8 x i8> [[TMP100]]) -; DISABLED_MASKED_STRIDED-NEXT: [[TMP102:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP102]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP103:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP103]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = extractelement <8 x i8> [[TMP101]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP105]], ptr [[TMP104]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] -; DISABLED_MASKED_STRIDED: pred.store.continue: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = extractelement <8 x i1> [[TMP3]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP106]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if33: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP107]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = extractelement <8 x i8> [[TMP101]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP109]], ptr [[TMP108]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE34]] -; DISABLED_MASKED_STRIDED: pred.store.continue34: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = extractelement <8 x i1> [[TMP3]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP110]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if35: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP111]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = extractelement <8 x i8> [[TMP101]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP113]], ptr [[TMP112]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE36]] -; DISABLED_MASKED_STRIDED: pred.store.continue36: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = extractelement <8 x i1> [[TMP3]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP114]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if37: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP115]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = extractelement <8 x i8> [[TMP101]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP117]], ptr [[TMP116]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE38]] -; DISABLED_MASKED_STRIDED: pred.store.continue38: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = extractelement <8 x i1> [[TMP3]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP118]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if39: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP119]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = extractelement <8 x i8> [[TMP101]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP121]], ptr [[TMP120]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE40]] -; DISABLED_MASKED_STRIDED: pred.store.continue40: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = extractelement <8 x i1> [[TMP3]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP122]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if41: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP123]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = extractelement <8 x i8> [[TMP101]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP125]], ptr [[TMP124]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE42]] -; DISABLED_MASKED_STRIDED: pred.store.continue42: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = extractelement <8 x i1> [[TMP3]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP126]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if43: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP127]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = extractelement <8 x i8> [[TMP101]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP129]], ptr [[TMP128]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]] -; DISABLED_MASKED_STRIDED: pred.store.continue44: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = extractelement <8 x i1> [[TMP3]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP130]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if45: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP131]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i8> [[TMP101]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP133]], ptr [[TMP132]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]] -; DISABLED_MASKED_STRIDED: pred.store.continue46: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = sub <8 x i8> zeroinitializer, [[TMP101]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP135]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if47: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = extractelement <8 x i32> [[TMP52]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP136]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i8> [[TMP134]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP138]], ptr [[TMP137]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]] -; DISABLED_MASKED_STRIDED: pred.store.continue48: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = extractelement <8 x i1> [[TMP3]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP139]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if49: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = extractelement <8 x i32> [[TMP52]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP140]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i8> [[TMP134]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP142]], ptr [[TMP141]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]] -; DISABLED_MASKED_STRIDED: pred.store.continue50: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i1> [[TMP3]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP143]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if51: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i32> [[TMP52]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP144]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i8> [[TMP134]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP146]], ptr [[TMP145]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]] -; DISABLED_MASKED_STRIDED: pred.store.continue52: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = extractelement <8 x i1> [[TMP3]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP147]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if53: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i32> [[TMP52]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP148]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i8> [[TMP134]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP150]], ptr [[TMP149]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]] -; DISABLED_MASKED_STRIDED: pred.store.continue54: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = extractelement <8 x i1> [[TMP3]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP151]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if55: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i32> [[TMP52]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP152]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i8> [[TMP134]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP154]], ptr [[TMP153]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]] -; DISABLED_MASKED_STRIDED: pred.store.continue56: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i1> [[TMP3]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP155]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if57: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i32> [[TMP52]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP156]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i8> [[TMP134]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP158]], ptr [[TMP157]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]] -; DISABLED_MASKED_STRIDED: pred.store.continue58: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = extractelement <8 x i1> [[TMP3]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP159]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60:%.*]] -; DISABLED_MASKED_STRIDED: pred.store.if59: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i32> [[TMP52]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP160]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i8> [[TMP134]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP92]], i8 [[TMP98]]) +; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP90]] ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP162]], ptr [[TMP161]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] -; DISABLED_MASKED_STRIDED: pred.store.continue60: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = extractelement <8 x i1> [[TMP3]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP163]], label [[PRED_STORE_IF61:%.*]], label [[PRED_STORE_CONTINUE62]] -; DISABLED_MASKED_STRIDED: pred.store.if61: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i32> [[TMP52]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP164]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = extractelement <8 x i8> [[TMP134]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = sub i8 0, [[TMP162]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP96]] ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP166]], ptr [[TMP165]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE62]] -; DISABLED_MASKED_STRIDED: pred.store.continue62: -; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], -; DISABLED_MASKED_STRIDED-NEXT: [[TMP167:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP167]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE30]] +; DISABLED_MASKED_STRIDED: for.inc: +; DISABLED_MASKED_STRIDED-NEXT: [[INC]] = add nuw nsw i32 [[IX_023]], 1 +; DISABLED_MASKED_STRIDED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -2534,38 +2337,28 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly ; ENABLED_MASKED_STRIDED-NEXT: entry: ; ENABLED_MASKED_STRIDED-NEXT: [[CMP22:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[CMP22]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]] -; ENABLED_MASKED_STRIDED: vector.ph: -; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add nuw i32 [[N]], 7 -; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 -; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[N]], -1 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[GUARD:%.*]], i64 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer -; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] -; ENABLED_MASKED_STRIDED: vector.body: -; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer -; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl i32 [[INDEX]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP2]] -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison) -; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> +; ENABLED_MASKED_STRIDED: for.body: +; ENABLED_MASKED_STRIDED-NEXT: [[IX_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[IX_023]], [[GUARD:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; ENABLED_MASKED_STRIDED: if.then: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[IX_023]], 1 +; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP2]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = or disjoint i32 [[TMP2]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8> [[STRIDED_VEC3]]) -; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = sub <8 x i8> zeroinitializer, [[TMP6]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[TMP5]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 -1 -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP9]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) -; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], -; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP10]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP5]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[SPEC_SELECT_I:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP0]], i8 [[TMP1]]) +; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP2]] +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[SPEC_SELECT_I]], ptr [[ARRAYIDX5]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[SUB:%.*]] = sub i8 0, [[SPEC_SELECT_I]] +; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP5]] +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[SUB]], ptr [[ARRAYIDX9]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[FOR_INC]] +; ENABLED_MASKED_STRIDED: for.inc: +; ENABLED_MASKED_STRIDED-NEXT: [[INC]] = add nuw nsw i32 [[IX_023]], 1 +; ENABLED_MASKED_STRIDED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[VECTOR_PH]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -2962,7 +2755,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -2998,7 +2791,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP7]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll index 86ca1222aa4ea..a1088c722b432 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll @@ -111,20 +111,30 @@ for.end: define void @interleaved_with_cond_store_1(ptr %p, i64 %x, i64 %n) { ; CHECK-LABEL: @interleaved_with_cond_store_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N:%.*]], 3 +; CHECK-NEXT: [[SMAX1:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 3 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[TMP8:%.*]] = add nsw i64 [[N]], -1 +; CHECK-NEXT: [[MUL_RESULT:%.*]] = shl i64 [[TMP8]], 4 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = icmp ugt i64 [[TMP8]], 1152921504606846975 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[MUL_RESULT]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp ult ptr [[TMP19]], [[P]] +; CHECK-NEXT: [[TMP21:%.*]] = or i1 [[TMP20]], [[MUL_OVERFLOW]] +; CHECK-NEXT: br i1 [[TMP21]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[N]], 1 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[SMAX1]], 1 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 2, i64 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[TMP1]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX1]], [[TMP1]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 1 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -133,23 +143,20 @@ define void @interleaved_with_cond_store_1(ptr %p, i64 %x, i64 %n) { ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i64 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0 -; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 8 +; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP3]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP6]], i64 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] -; CHECK: pred.store.if1: -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 0 +; CHECK: pred.store.if2: ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2 ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP11]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] -; CHECK: pred.store.continue2: -; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[WIDE_VEC3]], i64 0 +; CHECK: pred.store.continue3: +; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP11]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[WIDE_VEC3]], i64 2 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP5]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -157,7 +164,7 @@ define void @interleaved_with_cond_store_1(ptr %p, i64 %x, i64 %n) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_PH]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[IF_MERGE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index 4c3377255b21a..e0da9a91c59e3 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --prefix-filecheck-ir-name VAR ; RUN: opt -S -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" @@ -342,11 +342,19 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-LABEL: @test_reversed_load2_store2( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 8184 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult ptr [[SCEVGEP]], [[B]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[B]], i64 8188 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[B]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt ptr [[TMP8]], [[SCEVGEP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP7]], [[TMP9]] +; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], ptr [[A:%.*]], i64 [[OFFSET_IDX]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -24 @@ -357,7 +365,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]] ; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B:%.*]], i64 [[OFFSET_IDX]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B]], i64 [[OFFSET_IDX]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 -28 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> @@ -374,7 +382,21 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 1023, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[A]], i64 [[INDVARS_IV]], i32 0 +; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr [[X]], align 4 +; CHECK-NEXT: [[VARTMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP]], [[VARTMP1]] +; CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[A]], i64 [[INDVARS_IV]], i32 1 +; CHECK-NEXT: [[VARTMP2:%.*]] = load i32, ptr [[Y]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[VARTMP2]], [[VARTMP1]] +; CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B]], i64 [[INDVARS_IV]], i32 0 +; CHECK-NEXT: store i32 [[ADD]], ptr [[X5]], align 4 +; CHECK-NEXT: [[Y8:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B]], i64 [[INDVARS_IV]], i32 1 +; CHECK-NEXT: store i32 [[SUB]], ptr [[Y8]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]] ; entry: br label %for.body @@ -441,8 +463,8 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[VARTMP1:%.*]] = lshr exact i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[VARTMP1]] ; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV]], 1022 @@ -518,8 +540,8 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[VARTMP1:%.*]] = lshr exact i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[VARTMP1]] ; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], [[N]] @@ -568,13 +590,18 @@ define void @load_gap_reverse(ptr noalias nocapture %P1, ptr noalias nocapture % ; CHECK-LABEL: @load_gap_reverse( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 16376 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[P2]], i64 8 +; CHECK-NEXT: [[TMP31:%.*]] = icmp ugt ptr [[TMP30]], [[SCEVGEP]] +; CHECK-NEXT: br i1 [[TMP31]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 1022, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 1021, [[INDEX]] @@ -584,7 +611,7 @@ define void @load_gap_reverse(ptr noalias nocapture %P1, ptr noalias nocapture % ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P1]], i64 [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P1]], i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P1]], i64 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2:%.*]], i64 [[OFFSET_IDX]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2]], i64 [[OFFSET_IDX]], i32 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2]], i64 [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2]], i64 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2]], i64 [[TMP2]], i32 1 @@ -622,7 +649,17 @@ define void @load_gap_reverse(ptr noalias nocapture %P1, ptr noalias nocapture % ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_EXIT]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: [[I:%.*]] = phi i64 [ 1023, [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP32:%.*]] = add nsw i64 [[I]], [[X]] +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P1]], i64 [[I]], i32 0 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2]], i64 [[I]], i32 1 +; CHECK-NEXT: [[TMP35:%.*]] = load i64, ptr [[TMP34]], align 8 +; CHECK-NEXT: [[TMP36:%.*]] = sub nsw i64 [[TMP35]], [[I]] +; CHECK-NEXT: store i64 [[TMP32]], ptr [[TMP33]], align 8 +; CHECK-NEXT: store i64 [[TMP36]], ptr [[TMP34]], align 8 +; CHECK-NEXT: [[I_NEXT]] = add nsw i64 [[I]], -1 +; CHECK-NEXT: [[COND:%.*]] = icmp sgt i64 [[I]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_EXIT]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll b/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll index b721d2184bcc1..f9ec777fe1439 100644 --- a/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll +++ b/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll @@ -12,17 +12,27 @@ define void @test(ptr %data) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DATA:%.*]], i64 8 +; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 1023) +; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ult ptr [[TMP14]], [[SCEVGEP]] +; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW]] +; CHECK-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH1:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[TMP3]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[DATA:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8 @@ -36,7 +46,7 @@ define void @test(ptr %data) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1022, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1022, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_PH]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_LATCH:%.*]] ]