diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index bd57d1192eb94..68aea6b0e99c8 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -15169,6 +15169,32 @@ void SCEVUnionPredicate::add(const SCEVPredicate *N, ScalarEvolution &SE) { return; } + if (auto *WrapPred = dyn_cast(N)) { + const SCEVAddRecExpr *AddRecToCheck = WrapPred->getExpr(); + const SCEV *ExitCount = + SE.getSymbolicMaxBackedgeTakenCount(AddRecToCheck->getLoop()); + if (!isa(ExitCount) && + WrapPred->getFlags() == SCEVWrapPredicate::IncrementNUSW) { + const SCEV *Step = AddRecToCheck->getStepRecurrence(SE); + unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType()); + unsigned DstBits = SE.getTypeSizeInBits(AddRecToCheck->getType()); + + // AddRecs starting at zero with positive steps won't wrap if (Step * + // trunc ExitCount) does not wrap. + if (AddRecToCheck->getStart()->isZero() && SE.isKnownPositive(Step) && + DstBits < SrcBits && + ExitCount == + SE.getZeroExtendExpr( + SE.getTruncateExpr(ExitCount, AddRecToCheck->getType()), + ExitCount->getType()) && + SE.willNotOverflow( + Instruction::Mul, /*Signed=*/false, Step, + SE.getTruncateExpr(ExitCount, AddRecToCheck->getType()))) { + return; + } + } + } + // Only add predicate if it is not already implied by this union predicate. if (implies(N, SE)) return; diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll index 510bbe73d7f78..f89a68bd0dafe 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -353,31 +353,49 @@ exit: define void @can_prove_scev_predicate_is_always_true(ptr %dst) { ; CHECK-LABEL: define void @can_prove_scev_predicate_is_always_true( ; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: call void @llvm.masked.store.v64i32.p0(<64 x i32> , ptr [[DST]], i32 4, <64 x i1> ) +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] ; CHECK-NEXT: store i32 [[IV]], ptr [[GEP]], align 4 ; CHECK-NEXT: [[CONV:%.*]] = and i32 [[IV]], 65535 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], 4 ; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[CONV]], 1 -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; ; AUTOVF-LABEL: define void @can_prove_scev_predicate_is_always_true( ; AUTOVF-SAME: ptr [[DST:%.*]]) #[[ATTR2]] { -; AUTOVF-NEXT: [[ENTRY:.*]]: +; AUTOVF-NEXT: [[ENTRY:.*:]] +; AUTOVF-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; AUTOVF: [[VECTOR_PH]]: +; AUTOVF-NEXT: br label %[[VECTOR_BODY:.*]] +; AUTOVF: [[VECTOR_BODY]]: +; AUTOVF-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> , ptr [[DST]], i32 4, <8 x i1> ) +; AUTOVF-NEXT: br label %[[MIDDLE_BLOCK:.*]] +; AUTOVF: [[MIDDLE_BLOCK]]: +; AUTOVF-NEXT: br label %[[EXIT:.*]] +; AUTOVF: [[SCALAR_PH]]: ; AUTOVF-NEXT: br label %[[LOOP:.*]] ; AUTOVF: [[LOOP]]: -; AUTOVF-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[LOOP]] ] +; AUTOVF-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] ; AUTOVF-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] ; AUTOVF-NEXT: store i32 [[IV]], ptr [[GEP]], align 4 ; AUTOVF-NEXT: [[CONV:%.*]] = and i32 [[IV]], 65535 ; AUTOVF-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], 4 ; AUTOVF-NEXT: [[ADD]] = add nuw nsw i32 [[CONV]], 1 -; AUTOVF-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; AUTOVF-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP8:![0-9]+]] ; AUTOVF: [[EXIT]]: ; AUTOVF-NEXT: ret void ; @@ -428,7 +446,7 @@ define void @tail_folded_store_avx512(ptr %start, ptr %end) #3 { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 64 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 -4608 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: @@ -438,7 +456,7 @@ define void @tail_folded_store_avx512(ptr %start, ptr %end) #3 { ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr nusw i8, ptr [[PTR_IV]], i64 -72 ; CHECK-NEXT: store ptr null, ptr [[PTR_IV]], align 8 ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -472,7 +490,7 @@ define void @tail_folded_store_avx512(ptr %start, ptr %end) #3 { ; AUTOVF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; AUTOVF-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 -576 ; AUTOVF-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; AUTOVF-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; AUTOVF-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; AUTOVF: [[MIDDLE_BLOCK]]: ; AUTOVF-NEXT: br label %[[EXIT:.*]] ; AUTOVF: [[SCALAR_PH]]: @@ -482,7 +500,7 @@ define void @tail_folded_store_avx512(ptr %start, ptr %end) #3 { ; AUTOVF-NEXT: [[PTR_IV_NEXT]] = getelementptr nusw i8, ptr [[PTR_IV]], i64 -72 ; AUTOVF-NEXT: store ptr null, ptr [[PTR_IV]], align 8 ; AUTOVF-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]] -; AUTOVF-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; AUTOVF-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] ; AUTOVF: [[EXIT]]: ; AUTOVF-NEXT: ret void ;