Skip to content

Commit

Permalink
[NFCI] Regenerate LoopVectorize test checks
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbolvansky committed Apr 3, 2022
1 parent d3684c3 commit a113a58
Show file tree
Hide file tree
Showing 211 changed files with 30,890 additions and 6,209 deletions.
19 changes: 10 additions & 9 deletions llvm/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
@@ -1,29 +1,30 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce

; Check that we don't fall into an infinite loop.
define void @test() nounwind {
entry:
br label %for.body
br label %for.body

for.body:
%0 = phi i32 [ 1, %entry ], [ 0, %for.body ]
br label %for.body
%0 = phi i32 [ 1, %entry ], [ 0, %for.body ]
br label %for.body
}



define void @test2() nounwind {
entry:
br label %for.body
br label %for.body

for.body: ; preds = %for.body, %entry
%indvars.iv47 = phi i64 [ 0, %entry ], [ %indvars.iv.next48, %for.body ]
%0 = phi i32 [ 1, %entry ], [ 0, %for.body ]
%indvars.iv.next48 = add i64 %indvars.iv47, 1
br i1 undef, label %for.end, label %for.body
%indvars.iv47 = phi i64 [ 0, %entry ], [ %indvars.iv.next48, %for.body ]
%0 = phi i32 [ 1, %entry ], [ 0, %for.body ]
%indvars.iv.next48 = add i64 %indvars.iv47, 1
br i1 undef, label %for.end, label %for.body

for.end: ; preds = %for.body
unreachable
unreachable
}

;PR14701
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -loop-vectorize -dce -force-vector-interleave=1 -force-vector-width=4
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -loop-vectorize -dce -force-vector-interleave=1 -force-vector-width=4

; Check that we don't crash.

Expand Down
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -Oz -S -enable-new-pm=0 < %s | FileCheck %s
; RUN: opt -passes='default<Oz>' -S < %s | FileCheck %s

Expand All @@ -10,7 +11,57 @@ target triple = "arm64-apple-ios5.0.0"

define void @foo(float* noalias nocapture %ptrA, float* noalias nocapture readonly %ptrB, i64 %size) {
; CHECK-LABEL: @foo(
; CHECK: fmul <4 x float>
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp eq i64 [[SIZE:%.*]], 0
; CHECK-NEXT: br i1 [[EXITCOND1]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SIZE]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER6:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SIZE]], -8
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, float* [[PTRB:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 4
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[PTRA:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, <4 x float>* [[TMP5]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 4
; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD4]]
; CHECK-NEXT: [[TMP9:%.*]] = fmul <4 x float> [[WIDE_LOAD3]], [[WIDE_LOAD5]]
; CHECK-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
; CHECK-NEXT: store <4 x float> [[TMP8]], <4 x float>* [[TMP10]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
; CHECK-NEXT: store <4 x float> [[TMP9]], <4 x float>* [[TMP11]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[SIZE]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER6]]
; CHECK: for.body.preheader6:
; CHECK-NEXT: [[INDVARS_IV2_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV2_PH]], [[FOR_BODY_PREHEADER6]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[PTRB]], i64 [[INDVARS_IV2]]
; CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[PTRA]], i64 [[INDVARS_IV2]]
; CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP14]]
; CHECK-NEXT: store float [[MUL3]], float* [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV2]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[SIZE]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
;
entry:
br label %for.cond
Expand Down
82 changes: 72 additions & 10 deletions llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
@@ -1,18 +1,80 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -loop-vectorize -mtriple=aarch64-none-linux-gnu -mattr=+neon -S | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"

; Function Attrs: nounwind
define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) {
;CHECK-LABEL: array_add
;CHECK: load <4 x i32>
;CHECK: load <4 x i32>
;CHECK: load <4 x i32>
;CHECK: load <4 x i32>
;CHECK: add nsw <4 x i32>
;CHECK: add nsw <4 x i32>
;CHECK: store <4 x i32>
;CHECK: store <4 x i32>
;CHECK: ret
; CHECK-LABEL: @array_add(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SIZE]], -1
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 4
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 4
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[WIDE_LOAD2]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 0
; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* [[TMP22]], align 4
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 4
; CHECK-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP18]], <4 x i32>* [[TMP24]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP26]]
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[SIZE]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret i32* [[C]]
;
entry:
%cmp10 = icmp sgt i32 %size, 0
br i1 %cmp10, label %for.body.preheader, label %for.end
Expand Down
82 changes: 72 additions & 10 deletions llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll
@@ -1,18 +1,80 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -loop-vectorize -mtriple=arm64-none-linux-gnu -mattr=+neon -S | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"

; Function Attrs: nounwind
define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) {
;CHECK-LABEL: array_add
;CHECK: load <4 x i32>
;CHECK: load <4 x i32>
;CHECK: load <4 x i32>
;CHECK: load <4 x i32>
;CHECK: add nsw <4 x i32>
;CHECK: add nsw <4 x i32>
;CHECK: store <4 x i32>
;CHECK: store <4 x i32>
;CHECK: ret
; CHECK-LABEL: @array_add(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SIZE]], -1
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 4
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 4
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[WIDE_LOAD2]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 0
; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* [[TMP22]], align 4
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 4
; CHECK-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP18]], <4 x i32>* [[TMP24]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP26]]
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[SIZE]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret i32* [[C]]
;
entry:
%cmp10 = icmp sgt i32 %size, 0
br i1 %cmp10, label %for.body.preheader, label %for.end
Expand Down
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-vectorize -force-target-instruction-cost=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s 2>&1 | FileCheck %s

; This test currently fails when the LV calculates a maximums safe
Expand All @@ -15,6 +16,47 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"


define void @f1(i32* %A) #0 {
; CHECK-LABEL: @f1(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP5]], i64 1024)
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <vscale x 4 x i32>*
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* [[TMP8]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]]
; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %for.body

Expand Down

0 comments on commit a113a58

Please sign in to comment.