diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll index 19f2a363a733b..efcd810203a44 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5 ; REQUIRES: asserts ; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-ios %s -S -debug -disable-output 2>&1 | FileCheck --check-prefix=CM %s @@ -22,23 +23,31 @@ ; Check that the extractvalue operands are actually free in vector code. -; FORCED: [[E1:%.+]] = extractvalue { i64, i64 } %sv, 0 -; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i64> poison, i64 [[E1]], i64 0 -; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer -; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1 -; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 [[E2]], i64 0 -; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer -; FORCED-NEXT: [[ADD:%.+]] = add <2 x i64> %broadcast.splat, %broadcast.splat2 - -; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph -; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; FORCED-NEXT: [[GEP:%.+]] = getelementptr i64, ptr %dst, i32 %index -; FORCED-NEXT: store <2 x i64> [[ADD]], ptr [[GEP]], align 4 -; FORCED-NEXT: %index.next = add nuw i32 %index, 2 -; FORCED-NEXT: [[C:%.+]] = icmp eq i32 %index.next, 1000 -; FORCED-NEXT: br i1 [[C]], label %middle.block, label %vector.body - define void @test1(ptr %dst, {i64, i64} %sv) { +; FORCED-LABEL: define void @test1( +; FORCED-SAME: ptr [[DST:%.*]], { i64, i64 } [[SV:%.*]]) { +; FORCED-NEXT: [[ENTRY:.*:]] +; FORCED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; FORCED: [[VECTOR_PH]]: +; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { i64, i64 } [[SV]], 0 +; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0 +; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { i64, i64 } [[SV]], 1 +; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0 +; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; FORCED-NEXT: [[TMP1:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] +; FORCED-NEXT: br label %[[VECTOR_BODY:.*]] +; FORCED: [[VECTOR_BODY]]: +; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; FORCED-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]] +; FORCED-NEXT: store <2 x i64> [[TMP1]], ptr [[TMP2]], align 4 +; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; FORCED: [[MIDDLE_BLOCK]]: +; FORCED-NEXT: br [[EXIT:label %.*]] +; FORCED: [[SCALAR_PH]]: +; entry: br label %loop.body @@ -70,25 +79,31 @@ declare float @powf(float, float) readnone nounwind ; CM: LV: Scalar loop costs: 14. -; FORCED-LABEL: define void @test_getVectorCallCost - -; FORCED: [[E1:%.+]] = extractvalue { float, float } %sv, 0 -; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x float> poison, float [[E1]], i64 0 -; FORCED-NEXT: %broadcast.splat = shufflevector <2 x float> %broadcast.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer -; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1 -; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x float> poison, float [[E2]], i64 0 -; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x float> %broadcast.splatinsert1, <2 x float> poison, <2 x i32> zeroinitializer - -; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph -; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; FORCED-NEXT: [[GEP1:%.+]] = getelementptr float, ptr %dst, i32 %index -; FORCED-NEXT: [[POW:%.+]] = call <2 x float> @llvm.pow.v2f32(<2 x float> %broadcast.splat, <2 x float> %broadcast.splat2) -; FORCED-NEXT: store <2 x float> [[POW]], ptr [[GEP1]], align 4 -; FORCED-NEXT: %index.next = add nuw i32 %index, 2 -; FORCED-NEXT: [[C:%.+]] = icmp eq i32 %index.next, 1000 -; FORCED-NEXT: br i1 [[C]], label %middle.block, label %vector.body - define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) { +; FORCED-LABEL: define void @test_getVectorCallCost( +; FORCED-SAME: ptr [[DST:%.*]], { float, float } [[SV:%.*]]) { +; FORCED-NEXT: [[ENTRY:.*:]] +; FORCED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; FORCED: [[VECTOR_PH]]: +; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { float, float } [[SV]], 0 +; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 +; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer +; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { float, float } [[SV]], 1 +; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0 +; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer +; FORCED-NEXT: br label %[[VECTOR_BODY:.*]] +; FORCED: [[VECTOR_BODY]]: +; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; FORCED-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]] +; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]]) +; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4 +; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; FORCED: [[MIDDLE_BLOCK]]: +; FORCED-NEXT: br [[EXIT:label %.*]] +; FORCED: [[SCALAR_PH]]: +; entry: br label %loop.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll index 29795bc9f2982..5e99425c1482c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5 ; RUN: opt -S -passes=loop-vectorize -enable-vplan-native-path -mtriple aarch64-gnu-linux < %s | FileCheck %s ; extern int arr[8][8]; @@ -16,36 +17,6 @@ ; } ; -; CHECK-LABEL: @foo_i32( -; CHECK-LABEL: vector.ph: -; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i64 0 -; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer - -; CHECK-LABEL: vector.body: -; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] -; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] -; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]] -; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true)) -; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> -; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]] -; CHECK: br label %[[InnerLoop:.+]] - -; CHECK: [[InnerLoop]]: -; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] -; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> splat (i1 true)) -; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], splat (i64 1) -; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], splat (i64 8) -; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0 -; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] - -; CHECK: [[ForInc]]: -; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4 -; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], splat (i64 4) -; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8 -; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body - @arr2 = external global [8 x i32], align 16 @arr = external global [8 x [8 x i32]], align 16 @@ -54,6 +25,40 @@ ; Function Attrs: norecurse nounwind uwtable define void @foo_i32(i32 %n) { +; CHECK-LABEL: define void @foo_i32( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_LATCH:.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_LATCH]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP8]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: br label %[[FOR_BODY31:.*]] +; CHECK: [[FOR_BODY31]]: +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[FOR_BODY31]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> [[VEC_PHI]], <4 x i64> [[VEC_IND]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP2]], <4 x ptr> [[TMP3]], i32 4, <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i64> [[TMP4]], splat (i64 8) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[FOR_BODY31]] +; CHECK: [[VECTOR_LATCH]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, [[FOR_END10:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; entry: br label %for.body @@ -83,35 +88,39 @@ for.end10: ; preds = %for.inc8 ret void } -; CHECK-LABEL: @foo_i64( -; CHECK-LABEL: vector.ph: -; CHECK: %[[SplatVal:.*]] = insertelement <2 x i64> poison, i64 %n, i64 0 -; CHECK: %[[Splat:.*]] = shufflevector <2 x i64> %[[SplatVal]], <2 x i64> poison, <2 x i32> zeroinitializer - -; CHECK-LABEL: vector.body: -; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] -; CHECK: %[[VecInd:.*]] = phi <2 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] -; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i64], ptr @arrX, i64 0, <2 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %[[VecInd]], <2 x ptr> %[[AAddr]], i32 4, <2 x i1> splat (i1 true)) -; CHECK: %[[StoreVal:.*]] = add nsw <2 x i64> %[[VecInd]], %[[Splat]] -; CHECK: br label %[[InnerLoop:.+]] - -; CHECK: [[InnerLoop]]: -; CHECK: %[[InnerPhi:.*]] = phi <2 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] -; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i64]], ptr @arrY, i64 0, <2 x i64> %[[InnerPhi]], <2 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %[[StoreVal]], <2 x ptr> %[[AAddr2]], i32 4, <2 x i1> splat (i1 true)) -; CHECK: %[[InnerPhiNext]] = add nuw nsw <2 x i64> %[[InnerPhi]], splat (i64 1) -; CHECK: %[[VecCond:.*]] = icmp eq <2 x i64> %[[InnerPhiNext]], splat (i64 8) -; CHECK: %[[InnerCond:.*]] = extractelement <2 x i1> %[[VecCond]], i32 0 -; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] - -; CHECK: [[ForInc]]: -; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 2 -; CHECK: %[[VecIndNext]] = add <2 x i64> %[[VecInd]], splat (i64 2) -; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8 -; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body -; Function Attrs: norecurse nounwind uwtable define void @foo_i64(i64 %n) { +; CHECK-LABEL: define void @foo_i64( +; CHECK-SAME: i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[N]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_LATCH:.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_LATCH]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [8 x i64], ptr @arrX, i64 0, <2 x i64> [[VEC_IND]] +; CHECK-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[VEC_IND]], <2 x ptr> [[TMP0]], i32 4, <2 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: br label %[[FOR_BODY31:.*]] +; CHECK: [[FOR_BODY31]]: +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP3:%.*]], %[[FOR_BODY31]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [8 x [8 x i64]], ptr @arrY, i64 0, <2 x i64> [[VEC_PHI]], <2 x i64> [[VEC_IND]] +; CHECK-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[TMP1]], <2 x ptr> [[TMP2]], i32 4, <2 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP3]] = add nuw nsw <2 x i64> [[VEC_PHI]], splat (i64 1) +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i64> [[TMP3]], splat (i64 8) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[VECTOR_LATCH]], label %[[FOR_BODY31]] +; CHECK: [[VECTOR_LATCH]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, [[FOR_END10:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll index 473fabfc9fecc..e214e82e782a6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll @@ -1,17 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5 ; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s target triple = "aarch64-unknown-linux-gnu" define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 { -; CHECK-LABEL: @widen_extractvalue( -; CHECK: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV:%.*]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[EXTRACT0]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1 -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i64 [[EXTRACT1]], i64 0 -; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer -; CHECK: [[ADD:%.*]] = add [[DOTSPLAT]], [[DOTSPLAT2]] -; CHECK: vector.body: +; CHECK-LABEL: define void @widen_extractvalue( +; CHECK-SAME: ptr [[DST:%.*]], { i64, i64 } [[SV:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 1000, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1000, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1000, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2 +; CHECK-NEXT: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV]], 0 +; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i64 [[EXTRACT0]], i64 0 +; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { i64, i64 } [[SV]], 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = add [[DOTSPLAT2]], [[BROADCAST_SPLAT2]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: store [[TMP7]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 1000, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; entry: br label %loop.body @@ -38,4 +64,3 @@ attributes #0 = { "target-features"="+sve" } !3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} !4 = !{!"llvm.loop.interleave.count", i32 1} !5 = !{!"llvm.loop.vectorize.enable", i1 true} - diff --git a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll index 02d48cbda1aab..7a59884bdfba9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5 ; RUN: opt -S -passes=loop-vectorize -enable-vplan-native-path -mtriple x86_64 < %s | FileCheck %s ; RUN: opt -S -passes=loop-vectorize -enable-vplan-native-path -mtriple x86_64 -mattr=+avx < %s | FileCheck %s --check-prefix=AVX ; RUN: opt -S -passes=loop-vectorize -enable-vplan-native-path -mtriple x86_64 -mattr=+avx2 < %s | FileCheck %s --check-prefix=AVX @@ -18,68 +19,78 @@ ; } ; -; CHECK-LABEL: vector.ph: -; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i64 0 -; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer - -; CHECK-LABEL: vector.body: -; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] -; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] -; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]] -; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true)) -; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> -; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]] -; CHECK: br label %[[InnerLoop:.+]] - -; CHECK: [[InnerLoop]]: -; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] -; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> splat (i1 true)) -; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], splat (i64 1) -; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], splat (i64 8) -; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0 -; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] - -; CHECK: [[ForInc]]: -; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4 -; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], splat (i64 4) -; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8 -; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body - -; AVX-LABEL: vector.ph: -; AVX: %[[SplatVal:.*]] = insertelement <8 x i32> poison, i32 %n, i64 0 -; AVX: %[[Splat:.*]] = shufflevector <8 x i32> %[[SplatVal]], <8 x i32> poison, <8 x i32> zeroinitializer - -; AVX-LABEL: vector.body: -; AVX: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] -; AVX: %[[VecInd:.*]] = phi <8 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] -; AVX: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <8 x i64> %[[VecInd]] -; AVX: %[[VecIndTr:.*]] = trunc <8 x i64> %[[VecInd]] to <8 x i32> -; AVX: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %[[VecIndTr]], <8 x ptr> %[[AAddr]], i32 4, <8 x i1> splat (i1 true)) -; AVX: %[[VecIndTr2:.*]] = trunc <8 x i64> %[[VecInd]] to <8 x i32> -; AVX: %[[StoreVal:.*]] = add nsw <8 x i32> %[[VecIndTr2]], %[[Splat]] -; AVX: br label %[[InnerLoop:.+]] - -; AVX: [[InnerLoop]]: -; AVX: %[[InnerPhi:.*]] = phi <8 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] -; AVX: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <8 x i64> %[[InnerPhi]], <8 x i64> %[[VecInd]] -; AVX: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %[[StoreVal]], <8 x ptr> %[[AAddr2]], i32 4, <8 x i1> splat (i1 true)) -; AVX: %[[InnerPhiNext]] = add nuw nsw <8 x i64> %[[InnerPhi]], splat (i64 1) -; AVX: %[[VecCond:.*]] = icmp eq <8 x i64> %[[InnerPhiNext]], splat (i64 8) -; AVX: %[[InnerCond:.*]] = extractelement <8 x i1> %[[VecCond]], i32 0 -; AVX: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] - -; AVX: [[ForInc]]: -; AVX: %[[IndNext]] = add nuw i64 %[[Ind]], 8 -; AVX: %[[VecIndNext]] = add <8 x i64> %[[VecInd]], splat (i64 8) -; AVX: br i1 true, label %middle.block, label %vector.body - @arr2 = external global [8 x i32], align 16 @arr = external global [8 x [8 x i32]], align 16 ; Function Attrs: norecurse nounwind uwtable define void @foo(i32 %n) { +; CHECK-LABEL: define void @foo( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_LATCH:.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_LATCH]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP8]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: br label %[[FOR_BODY31:.*]] +; CHECK: [[FOR_BODY31]]: +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[FOR_BODY31]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> [[VEC_PHI]], <4 x i64> [[VEC_IND]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP2]], <4 x ptr> [[TMP3]], i32 4, <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i64> [[TMP4]], splat (i64 8) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[FOR_BODY31]] +; CHECK: [[VECTOR_LATCH]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, [[FOR_END10:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; +; AVX-LABEL: define void @foo( +; AVX-SAME: i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; AVX-NEXT: [[ENTRY:.*:]] +; AVX-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; AVX: [[VECTOR_PH]]: +; AVX-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[N]], i64 0 +; AVX-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +; AVX-NEXT: br label %[[VECTOR_BODY:.*]] +; AVX: [[VECTOR_BODY]]: +; AVX-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_LATCH:.*]] ] +; AVX-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_LATCH]] ] +; AVX-NEXT: [[TMP0:%.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <8 x i64> [[VEC_IND]] +; AVX-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[VEC_IND]] to <8 x i32> +; AVX-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP1]], <8 x ptr> [[TMP0]], i32 4, <8 x i1> splat (i1 true)) +; AVX-NEXT: [[TMP7:%.*]] = trunc <8 x i64> [[VEC_IND]] to <8 x i32> +; AVX-NEXT: [[TMP2:%.*]] = add nsw <8 x i32> [[TMP7]], [[BROADCAST_SPLAT]] +; AVX-NEXT: br label %[[FOR_BODY31:.*]] +; AVX: [[FOR_BODY31]]: +; AVX-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[FOR_BODY31]] ] +; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <8 x i64> [[VEC_PHI]], <8 x i64> [[VEC_IND]] +; AVX-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP2]], <8 x ptr> [[TMP3]], i32 4, <8 x i1> splat (i1 true)) +; AVX-NEXT: [[TMP4]] = add nuw nsw <8 x i64> [[VEC_PHI]], splat (i64 1) +; AVX-NEXT: [[TMP5:%.*]] = icmp eq <8 x i64> [[TMP4]], splat (i64 8) +; AVX-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i32 0 +; AVX-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[FOR_BODY31]] +; AVX: [[VECTOR_LATCH]]: +; AVX-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; AVX-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) +; AVX-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; AVX: [[MIDDLE_BLOCK]]: +; AVX-NEXT: br i1 true, [[FOR_END10:label %.*]], label %[[SCALAR_PH]] +; AVX: [[SCALAR_PH]]: +; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/assume.ll b/llvm/test/Transforms/LoopVectorize/assume.ll index c81f48ff62afc..af8ca4a0fae03 100644 --- a/llvm/test/Transforms/LoopVectorize/assume.ll +++ b/llvm/test/Transforms/LoopVectorize/assume.ll @@ -1,10 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5 ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S | FileCheck %s define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) { -; CHECK-LABEL: @test1( -; CHECK: vector.body: -; CHECK: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr {{.*}}, align 4 -; CHECK: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr {{.*}}, align 4 +; CHECK-LABEL: define void @test1( +; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 2 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+02) ; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD1]], splat (float 1.000000e+02) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 @@ -15,6 +24,19 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP5]]) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 2 +; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[TMP10]], align 4 +; CHECK-NEXT: store <2 x float> [[TMP9]], ptr [[TMP11]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1600 +; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br [[FOR_END:label %.*]] +; CHECK: [[SCALAR_PH]]: +; entry: br label %for.body @@ -42,16 +64,48 @@ attributes #0 = { nounwind willreturn } %struct.data = type { ptr, ptr } define void @test2(ptr nocapture readonly %d) { -; CHECK-LABEL: @test2( -; CHECK: entry: -; CHECK: [[MASKCOND:%.*]] = icmp eq i64 %maskedptr, 0 -; CHECK: [[MASKCOND4:%.*]] = icmp eq i64 %maskedptr3, 0 -; CHECK: vector.body: -; CHECK: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-LABEL: define void @test2( +; CHECK-SAME: ptr readonly captures(none) [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_DATA:%.*]], ptr [[D]], i64 0, i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B]], align 8 +; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint ptr [[TMP0]] to i64 +; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 +; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D]], align 8 +; CHECK-NEXT: [[PTRINT2:%.*]] = ptrtoint ptr [[TMP1]] to i64 +; CHECK-NEXT: [[MASKEDPTR3:%.*]] = and i64 [[PTRINT2]], 31 +; CHECK-NEXT: [[MASKCOND4:%.*]] = icmp eq i64 [[MASKEDPTR3]], 0 +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[PTRINT2]], [[PTRINT]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16 +; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) -; CHECK: tail call void @llvm.assume(i1 [[MASKCOND4]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 2 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]]) ; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]]) -; CHECK: for.body: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 2 +; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[TMP7]], align 4 +; CHECK-NEXT: store <2 x float> [[TMP6]], ptr [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1600 +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br [[FOR_END:label %.*]] +; CHECK: [[SCALAR_PH]]: +; entry: %b = getelementptr inbounds %struct.data, ptr %d, i64 0, i32 1 %0 = load ptr, ptr %b, align 8 @@ -87,10 +141,46 @@ for.end: ; preds = %for.body ; in the vector body. define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i32 %n) { ; Check that the vector.body does not contain any assumes. -; CHECK-LABEL: @predicated_assume( -; CHECK: vector.body: -; CHECK-NOT: llvm.assume -; CHECK: for.body: +; CHECK-LABEL: define void @predicated_assume( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP15:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP15]], [[FOR_COND_CLEANUP:label %.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[VEC_IND]], splat (i64 495616) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[STEP_ADD]], splat (i64 495616) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x float> splat (float 2.300000e+01), <2 x float> splat (float 4.200000e+01) +; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP2]], <2 x float> splat (float 2.300000e+01), <2 x float> splat (float 4.200000e+01) +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 2 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[PREDPHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x float> [[PREDPHI1]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 2 +; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[TMP7]], align 4 +; CHECK-NEXT: store <2 x float> [[TMP6]], ptr [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[FOR_COND_CLEANUP_LOOPEXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; entry: %cmp15 = icmp eq i32 %n, 0 br i1 %cmp15, label %for.cond.cleanup, label %for.body.preheader diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll index 80e7de71870b7..6bc2f389d65ce 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll @@ -1,3 +1,6 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5 +; RUN: opt -S -passes=loop-vectorize -enable-vplan-native-path -verify-loop-info -verify-dom-info < %s | FileCheck %s + ; extern int arr[8][8]; ; extern int arr2[8]; ; @@ -13,41 +16,46 @@ ; } ; } ; -; RUN: opt -S -passes=loop-vectorize -enable-vplan-native-path -verify-loop-info -verify-dom-info < %s | FileCheck %s -; CHECK-LABEL: vector.ph: -; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i64 0 -; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer - -; CHECK-LABEL: vector.body: -; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] -; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] -; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]] -; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true)) -; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> -; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]] -; CHECK: br label %[[InnerLoop:.+]] - -; CHECK: [[InnerLoop]]: -; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] -; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> splat (i1 true)) -; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], splat (i64 1) -; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], splat (i64 8) -; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0 -; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] - -; CHECK: [[ForInc]]: -; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4 -; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], splat (i64 4) -; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8 -; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body @arr2 = external global [8 x i32], align 16 @arr = external global [8 x [8 x i32]], align 16 ; Function Attrs: norecurse nounwind uwtable define void @foo(i32 %n) { +; CHECK-LABEL: define void @foo( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_LATCH:.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_LATCH]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP8]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: br label %[[FOR_BODY31:.*]] +; CHECK: [[FOR_BODY31]]: +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[FOR_BODY31]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> [[VEC_PHI]], <4 x i64> [[VEC_IND]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP2]], <4 x ptr> [[TMP3]], i32 4, <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i64> [[TMP4]], splat (i64 8) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[FOR_BODY31]] +; CHECK: [[VECTOR_LATCH]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, [[FOR_END10:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll index 358293f123454..82911640aea8c 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll @@ -1,14 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5 ; RUN: opt < %s -scalable-vectorization=on -force-target-supports-scalable-vectors=true -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S | FileCheck %s define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) { -; CHECK-LABEL: @test1( -; CHECK: vector.body: -; CHECK: [[E1:%.*]] = extractelement {{.+}}, i32 0 -; CHECK-NEXT: [[FCMP1:%.*]] = fcmp ogt float [[E1]] -; CHECK-NEXT: [[E2:%.*]] = extractelement {{.+}}, i32 0 -; CHECK-NEXT: [[FCMP2:%.*]] = fcmp ogt float [[E2]] +; CHECK-LABEL: define void @test1( +; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1600, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1600, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1600, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP8]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement [[WIDE_LOAD]], i32 0 +; CHECK-NEXT: [[FCMP1:%.*]] = fcmp ogt float [[TMP10]], 1.000000e+02 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement [[WIDE_LOAD1]], i32 0 +; CHECK-NEXT: [[FCMP2:%.*]] = fcmp ogt float [[TMP12]], 1.000000e+02 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[FCMP1]]) ; CHECK-NEXT: tail call void @llvm.assume(i1 [[FCMP2]]) +; CHECK-NEXT: [[TMP14:%.*]] = fadd [[WIDE_LOAD]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP15:%.*]] = fadd [[WIDE_LOAD1]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 2 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]] +; CHECK-NEXT: store [[TMP14]], ptr [[TMP16]], align 4 +; CHECK-NEXT: store [[TMP15]], ptr [[TMP19]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1600, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[FOR_END:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; entry: br label %for.body @@ -35,16 +73,54 @@ attributes #0 = { nounwind willreturn } %struct.data = type { ptr, ptr } -define void @test2(ptr %a, ptr %b) { -; CHECK-LABEL: @test2( -; CHECK: entry: -; CHECK: [[MASKCOND:%.*]] = icmp eq i64 %ptrint1, 0 -; CHECK: [[MASKCOND4:%.*]] = icmp eq i64 %ptrint2, 0 -; CHECK: vector.body: -; CHECK: tail call void @llvm.assume(i1 [[MASKCOND]]) +define void @test2(ptr %a, ptr noalias %b) { +; CHECK-LABEL: define void @test2( +; CHECK-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[PTRINT1:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[PTRINT1]], 0 +; CHECK-NEXT: [[PTRINT2:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[MASKCOND4:%.*]] = icmp eq i64 [[PTRINT2]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1600, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1600, [[TMP7]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1600, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) -; CHECK: tail call void @llvm.assume(i1 [[MASKCOND4]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP12]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 4 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = fadd [[WIDE_LOAD]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP15:%.*]] = fadd [[WIDE_LOAD3]], splat (float 1.000000e+00) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]]) ; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]]) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 2 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]] +; CHECK-NEXT: store [[TMP14]], ptr [[TMP16]], align 4 +; CHECK-NEXT: store [[TMP15]], ptr [[TMP19]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1600, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[FOR_END:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; entry: %ptrint1 = ptrtoint ptr %a to i64 %maskcond = icmp eq i64 %ptrint1, 0 @@ -75,10 +151,58 @@ for.end: ; preds = %for.body ; in the vector body. define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %n) { ; Check that the vector.body does not contain any assumes. -; CHECK-LABEL: @predicated_assume( -; CHECK: vector.body: -; CHECK-NOT: llvm.assume -; CHECK: for.body: +; CHECK-LABEL: define void @predicated_assume( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ult [[VEC_IND]], splat (i64 495616) +; CHECK-NEXT: [[TMP10:%.*]] = icmp ult [[STEP_ADD]], splat (i64 495616) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], splat (float 2.300000e+01), splat (float 4.200000e+01) +; CHECK-NEXT: [[PREDPHI1:%.*]] = select [[TMP10]], splat (float 2.300000e+01), splat (float 4.200000e+01) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 2 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP13]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP11]], align 4 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = fmul [[PREDPHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP16:%.*]] = fmul [[PREDPHI1]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 2 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP19]] +; CHECK-NEXT: store [[TMP15]], ptr [[TMP17]], align 4 +; CHECK-NEXT: store [[TMP16]], ptr [[TMP20]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[FOR_COND_CLEANUP:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; entry: br label %for.body