diff --git a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll new file mode 100644 index 0000000000000..ba859f2e3eec9 --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes='print' -scalar-evolution-classify-expressions=0 -disable-output %s 2>&1 | FileCheck %s + +define void @ptrtoint_based_trip_count_known_via_guards_applied_to_add_subexpr(ptr %start, ptr %end) { +; CHECK-LABEL: 'ptrtoint_based_trip_count_known_via_guards_applied_to_add_subexpr' +; CHECK-NEXT: Determining loop execution counts for: @ptrtoint_based_trip_count_known_via_guards_applied_to_add_subexpr +; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4611686018427387903 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %end.i = ptrtoint ptr %end to i64 + %start.i = ptrtoint ptr %start to i64 + %sub = sub i64 %end.i, %start.i + %pre.1 = icmp eq i64 %sub, 4 + call void @llvm.assume(i1 %pre.1) + br label %loop + +loop: + %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] + store i32 0, ptr %iv + %iv.next = getelementptr inbounds nuw i8, ptr %iv, i64 4 + %ec = icmp eq ptr %iv.next, %end + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-apply-to-adds.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-apply-to-adds.ll new file mode 100644 index 0000000000000..635126c9262cf --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-apply-to-adds.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes='print' -scalar-evolution-classify-expressions=0 -disable-output %s 2>&1 | FileCheck %s + +define void @max_btc_improved_by_applying_guards_to_add_subexpr(i32 %low, i32 %high) { +; CHECK-LABEL: 'max_btc_improved_by_applying_guards_to_add_subexpr' +; CHECK-NEXT: Determining loop execution counts for: @max_btc_improved_by_applying_guards_to_add_subexpr +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (zext i32 (1 + (-1 * %low) + %high) to i64)) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 -1 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + (zext i32 (1 + (-1 * %low) + %high) to i64)) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %sub = sub i32 %high, %low + %pre.1 = icmp slt i32 %sub, 8 + br i1 %pre.1, label %if.then, label %exit + +if.then: + %pre.2 = icmp slt i32 %sub, 0 + br i1 %pre.2, label %exit, label %ph + +ph: + %add.1 = add i32 %sub, 1 + %wide.trip.count = zext i32 %add.1 to i64 + br label %loop + +loop: + %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %wide.trip.count + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/miniters.ll b/llvm/test/Transforms/LoopVectorize/miniters.ll index 0b4c002045186..a0fd48d510f24 100644 --- a/llvm/test/Transforms/LoopVectorize/miniters.ll +++ b/llvm/test/Transforms/LoopVectorize/miniters.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "vector.ph:" --version 5 ; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s ; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s -check-prefix=UNROLL @@ -8,37 +9,91 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @a = common global [1000 x i32] zeroinitializer, align 16 ; Generate min.iters.check to skip the vector loop and jump to scalar.ph directly when loop iteration number is less than VF * UF. -; CHECK-LABEL: foo( -; CHECK: %min.iters.check = icmp ult i64 %N, 4 -; CHECK: br i1 %min.iters.check, label %scalar.ph, label %vector.ph -; UNROLL-LABEL: foo( -; UNROLL: %min.iters.check = icmp ult i64 %N, 8 -; UNROLL: br i1 %min.iters.check, label %scalar.ph, label %vector.ph - define void @foo(i64 %N) { +; CHECK-LABEL: define void @foo( +; CHECK-SAME: i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[C:%.*]] = icmp sgt i64 [[N]], 0 +; CHECK-NEXT: br i1 [[C]], label %[[LOOP_PREHEADER:.*]], [[EXIT:label %.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; +; UNROLL-LABEL: define void @foo( +; UNROLL-SAME: i64 [[N:%.*]]) { +; UNROLL-NEXT: [[ENTRY:.*:]] +; UNROLL-NEXT: [[C:%.*]] = icmp sgt i64 [[N]], 0 +; UNROLL-NEXT: br i1 [[C]], label %[[LOOP_PREHEADER:.*]], [[EXIT:label %.*]] +; UNROLL: [[LOOP_PREHEADER]]: +; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 +; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] +; UNROLL: [[VECTOR_PH]]: +; entry: - %cmp.8 = icmp sgt i64 %N, 0 - br i1 %cmp.8, label %for.body.preheader, label %for.end - -for.body.preheader: ; preds = %entry - br label %for.body + %c = icmp sgt i64 %N, 0 + br i1 %c, label %loop, label %exit -for.body: ; preds = %for.body, %for.body.preheader - %i.09 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds [1000 x i32], ptr @b, i64 0, i64 %i.09 +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds [1000 x i32], ptr @b, i64 0, i64 %iv %tmp = load i32, ptr %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds [1000 x i32], ptr @c, i64 0, i64 %i.09 + %arrayidx1 = getelementptr inbounds [1000 x i32], ptr @c, i64 0, i64 %iv %tmp1 = load i32, ptr %arrayidx1, align 4 %add = add nsw i32 %tmp1, %tmp - %arrayidx2 = getelementptr inbounds [1000 x i32], ptr @a, i64 0, i64 %i.09 + %arrayidx2 = getelementptr inbounds [1000 x i32], ptr @a, i64 0, i64 %iv store i32 %add, ptr %arrayidx2, align 4 - %inc = add nuw nsw i64 %i.09, 1 - %exitcond = icmp eq i64 %inc, %N - br i1 %exitcond, label %for.end.loopexit, label %for.body + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %N + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @min_iters_known_via_loop_guards_add(i32 %start, i32 %end, ptr %src) { +; CHECK-LABEL: define void @min_iters_known_via_loop_guards_add( +; CHECK-SAME: i32 [[START:%.*]], i32 [[END:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[END]], [[START]] +; CHECK-NEXT: [[PRE:%.*]] = icmp sgt i32 [[SUB]], 100 +; CHECK-NEXT: call void @llvm.assume(i1 [[PRE]]) +; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[SUB]], 1 +; CHECK-NEXT: [[IV_START:%.*]] = zext i32 [[ADD_1]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 101, [[IV_START]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; +; UNROLL-LABEL: define void @min_iters_known_via_loop_guards_add( +; UNROLL-SAME: i32 [[START:%.*]], i32 [[END:%.*]], ptr [[SRC:%.*]]) { +; UNROLL-NEXT: [[ENTRY:.*:]] +; UNROLL-NEXT: [[SUB:%.*]] = sub i32 [[END]], [[START]] +; UNROLL-NEXT: [[PRE:%.*]] = icmp sgt i32 [[SUB]], 100 +; UNROLL-NEXT: call void @llvm.assume(i1 [[PRE]]) +; UNROLL-NEXT: [[ADD_1:%.*]] = add i32 [[SUB]], 1 +; UNROLL-NEXT: [[IV_START:%.*]] = zext i32 [[ADD_1]] to i64 +; UNROLL-NEXT: [[TMP0:%.*]] = sub i64 101, [[IV_START]] +; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 +; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] +; UNROLL: [[VECTOR_PH]]: +; +entry: + %sub = sub i32 %end, %start + %pre = icmp sgt i32 %sub, 100 + call void @llvm.assume(i1 %pre) + %add.1 = add i32 %sub, 1 + %iv.start = zext i32 %add.1 to i64 + br label %loop -for.end.loopexit: ; preds = %for.body - br label %for.end +loop: + %iv = phi i64 [ %iv.start, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i64, ptr %src, i64 %iv + store i64 %iv, ptr %gep + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 100 + br i1 %ec, label %exit, label %loop -for.end: ; preds = %for.end.loopexit, %entry +exit: ret void }