diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll index 6bafe5629cc4f..bff0bef13a34b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll @@ -259,14 +259,14 @@ define float @fmin_fast(float* noalias nocapture readonly %a, i64 %n) #0 { ; CHECK: vector.body: ; CHECK: %[[LOAD1:.*]] = load ; CHECK: %[[LOAD2:.*]] = load -; CHECK: %[[FCMP1:.*]] = fcmp olt %[[LOAD1]] -; CHECK: %[[FCMP2:.*]] = fcmp olt %[[LOAD2]] +; CHECK: %[[FCMP1:.*]] = fcmp fast olt %[[LOAD1]] +; CHECK: %[[FCMP2:.*]] = fcmp fast olt %[[LOAD2]] ; CHECK: %[[SEL1:.*]] = select %[[FCMP1]], %[[LOAD1]] ; CHECK: %[[SEL2:.*]] = select %[[FCMP2]], %[[LOAD2]] ; CHECK: middle.block: -; CHECK: %[[FCMP:.*]] = fcmp olt %[[SEL1]], %[[SEL2]] -; CHECK-NEXT: %[[SEL:.*]] = select %[[FCMP]], %[[SEL1]], %[[SEL2]] -; CHECK-NEXT: call float @llvm.vector.reduce.fmin.nxv8f32( %[[SEL]]) +; CHECK: %[[FCMP:.*]] = fcmp fast olt %[[SEL1]], %[[SEL2]] +; CHECK-NEXT: %[[SEL:.*]] = select fast %[[FCMP]], %[[SEL1]], %[[SEL2]] +; CHECK-NEXT: call fast float @llvm.vector.reduce.fmin.nxv8f32( %[[SEL]]) entry: br label %for.body @@ -275,7 +275,7 @@ for.body: %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] %arrayidx = getelementptr inbounds float, float* %a, i64 %iv %0 = load float, float* %arrayidx, align 4 - %cmp.i = fcmp olt float %0, %sum.07 + %cmp.i = fcmp fast olt float %0, %sum.07 %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll index 6ea9ee6e160f2..2a46b1e8e4b99 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll @@ -12,7 +12,7 @@ define void @induction_i7(i64* %dst) #0 { ; CHECK-LABEL: @induction_i7( ; CHECK: vector.ph: ; CHECK: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i8() -; CHECK: [[TMP5:%.*]] = trunc %4 to +; CHECK: [[TMP5:%.*]] = trunc [[TMP4]] to ; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i7 1, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] @@ -59,7 +59,7 @@ define void @induction_i3_zext(i64* %dst) #0 { ; CHECK-LABEL: @induction_i3_zext( ; CHECK: vector.ph: ; CHECK: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i8() -; CHECK: [[TMP5:%.*]] = trunc %4 to +; CHECK: [[TMP5:%.*]] = trunc [[TMP4]] to ; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i3 1, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll index ae3ed96555f76..c51abfe308267 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll @@ -1,19 +1,16 @@ ; REQUIRES: asserts ; RUN: opt < %s -loop-vectorize -debug -disable-output -force-ordered-reductions=true -hints-allow-reordering=false \ -; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -force-vector-width=4 -force-vector-interleave=1 \ -; RUN: -S 2>&1 | FileCheck %s --check-prefix=CHECK-VF4 +; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -force-vector-interleave=1 -S 2>&1 | FileCheck %s ; RUN: opt < %s -loop-vectorize -debug -disable-output -force-ordered-reductions=true -hints-allow-reordering=false \ -; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -force-vector-width=8 -force-vector-interleave=1 \ -; RUN: -S 2>&1 | FileCheck %s --check-prefix=CHECK-VF8 -; RUN: opt < %s -loop-vectorize -debug -disable-output -force-ordered-reductions=true -hints-allow-reordering=false \ -; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -force-vector-width=4 -force-vector-interleave=1 \ -; RUN: -mcpu=neoverse-n2 -S 2>&1 | FileCheck %s --check-prefix=CHECK-VF4-CPU-NEOVERSE-N2 +; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -force-vector-interleave=1 \ +; RUN: -mcpu=neoverse-n2 -S 2>&1 | FileCheck %s --check-prefix=CHECK-CPU-NEOVERSE-N2 target triple="aarch64-unknown-linux-gnu" -; CHECK-VF4: Found an estimated cost of 16 for VF vscale x 4 For instruction: %add = fadd float %0, %sum.07 -; CHECK-VF8: Found an estimated cost of 32 for VF vscale x 8 For instruction: %add = fadd float %0, %sum.07 -; CHECK-VF4-CPU-NEOVERSE-N2: Found an estimated cost of 8 for VF vscale x 4 For instruction: %add = fadd float %0, %sum.07 +; CHECK: Found an estimated cost of 8 for VF vscale x 2 For instruction: %add = fadd float %0, %sum.07 +; CHECK: Found an estimated cost of 16 for VF vscale x 4 For instruction: %add = fadd float %0, %sum.07 +; CHECK-CPU-NEOVERSE-N2: Found an estimated cost of 4 for VF vscale x 2 For instruction: %add = fadd float %0, %sum.07 +; CHECK-CPU-NEOVERSE-N2: Found an estimated cost of 8 for VF vscale x 4 For instruction: %add = fadd float %0, %sum.07 define float @fadd_strict32(float* noalias nocapture readonly %a, i64 %n) #0 { entry: @@ -34,9 +31,8 @@ for.end: } -; CHECK-VF4: Found an estimated cost of 16 for VF vscale x 4 For instruction: %add = fadd double %0, %sum.07 -; CHECK-VF8: Found an estimated cost of 32 for VF vscale x 8 For instruction: %add = fadd double %0, %sum.07 -; CHECK-VF4-CPU-NEOVERSE-N2: Found an estimated cost of 8 for VF vscale x 4 For instruction: %add = fadd double %0, %sum.07 +; CHECK: Found an estimated cost of 8 for VF vscale x 2 For instruction: %add = fadd double %0, %sum.07 +; CHECK-CPU-NEOVERSE-N2: Found an estimated cost of 4 for VF vscale x 2 For instruction: %add = fadd double %0, %sum.07 define double @fadd_strict64(double* noalias nocapture readonly %a, i64 %n) #0 { entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll index 481425bf02db8..3f60cf131dedd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -8,38 +8,27 @@ ; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S \ ; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s -define void @vector_reverse_f64(i64 %N, double* %a, double* %b) #0{ +define void @vector_reverse_f64(i64 %N, double* noalias %a, double* noalias %b) #0{ ; CHECK-LABEL: @vector_reverse_f64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A2:%.*]] = ptrtoint double* [[A:%.*]] to i64 -; CHECK-NEXT: [[B1:%.*]] = ptrtoint double* [[B:%.*]] to i64 ; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i64 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 3 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i64 [[TMP1]], [[N]] -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; CHECK: vector.memcheck: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 6 -; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N]], 3 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], [[B1]] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP4]], [[A2]] -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP7]], [[TMP3]] -; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 3 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 3 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[INDEX]], -1 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], [[N]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul i32 [[TMP7]], -8 ; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[DOTNEG]], 1 @@ -47,11 +36,11 @@ define void @vector_reverse_f64(i64 %N, double* %a, double* %b) #0{ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP10]] to * ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP11]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP13:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[DOTNEG7:%.*]] = mul i32 [[TMP14]], -8 -; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[DOTNEG7]], 1 +; CHECK-NEXT: [[DOTNEG2:%.*]] = mul i32 [[TMP14]], -8 +; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[DOTNEG2]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, double* [[TMP12]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = bitcast double* [[TMP17]] to * @@ -60,12 +49,12 @@ define void @vector_reverse_f64(i64 %N, double* %a, double* %b) #0{ ; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP19]], 3 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP20]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_MOD_VF]], [[MIDDLE_BLOCK]] ], [ [[N]], [[FOR_BODY_PREHEADER]] ], [ [[N]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_MOD_VF]], [[MIDDLE_BLOCK]] ], [ [[N]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -80,7 +69,7 @@ define void @vector_reverse_f64(i64 %N, double* %a, double* %b) #0{ ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[I_08]] ; CHECK-NEXT: store double [[ADD]], double* [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; entry: %cmp7 = icmp sgt i64 %N, 0 @@ -131,30 +120,30 @@ define void @vector_reverse_i64(i64 %N, i64* %a, i64* %b) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[INDEX]], -1 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], [[N]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul i32 [[TMP7]], -8 -; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[DOTNEG]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[TMP6]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[TMP10]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP11]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP13:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[DOTNEG7:%.*]] = mul i32 [[TMP14]], -8 -; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[DOTNEG7]], 1 -; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, i64* [[TMP12]], i64 [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64* [[TMP17]] to * -; CHECK-NEXT: store [[TMP13]], * [[TMP18]], align 8 -; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP19]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP20]] -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[INDEX]], -1 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], [[N]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[DOTNEG:%.*]] = mul i32 [[TMP13]], -8 +; CHECK-NEXT: [[TMP14:%.*]] = or i32 [[DOTNEG]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, i64* [[TMP12]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i64* [[TMP16]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP17]], align 8 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP19:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[DOTNEG4:%.*]] = mul i32 [[TMP20]], -8 +; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[DOTNEG4]], 1 +; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[TMP21]] to i64 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, i64* [[TMP18]], i64 [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast i64* [[TMP23]] to * +; CHECK-NEXT: store [[TMP19]], * [[TMP24]], align 8 +; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP26:%.*]] = shl i64 [[TMP25]], 3 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -169,12 +158,12 @@ define void @vector_reverse_i64(i64 %N, i64* %a, i64* %b) #0 { ; CHECK-NEXT: [[I_09_IN:%.*]] = phi i64 [ [[I_09:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_09]] = add nsw i64 [[I_09_IN]], -1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[I_09]] -; CHECK-NEXT: [[TMP22:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP22]], 1 +; CHECK-NEXT: [[TMP28:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP28]], 1 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I_09]] ; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX2]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_09_IN]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP6:![0-9]+]] ; entry: %cmp8 = icmp sgt i64 %N, 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index bb36224db1505..179f81237917c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -46,53 +46,53 @@ define void @pointer_induction_used_as_vector(i8** noalias %start.1, i8* noalias ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8*, i8** [[START_1:%.*]], i64 [[N_VEC]] -; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, i8* [[START_2:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, i8* [[START_2:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP7]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i32 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP12:%.*]] = add [[DOTSPLAT]], [[TMP11]] -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP12]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], [[VECTOR_GEP]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, [[TMP13]], i64 1 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to * -; CHECK-NEXT: store [[TMP14]], * [[TMP16]], align 8 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[TMP13]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP17]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP19]], align 1 -; CHECK-NEXT: [[TMP20:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP18]] to * -; CHECK-NEXT: store [[TMP20]], * [[TMP21]], align 1 -; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP23]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP11:%.*]] = add [[DOTSPLAT]], [[TMP10]] +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP11]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, [[TMP12]], i64 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to * +; CHECK-NEXT: store [[TMP13]], * [[TMP15]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[TMP16]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP18]], align 1 +; CHECK-NEXT: [[TMP19:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 1, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP17]] to * +; CHECK-NEXT: store [[TMP19]], * [[TMP20]], align 1 +; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8** [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8* [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[START_2]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i8* [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[START_2]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP_BODY:%.*]] ; CHECK: loop.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_BODY]] ] ; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi i8** [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[PTR_IV_1_NEXT:%.*]], [[LOOP_BODY]] ] -; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi i8* [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[PTR_IV_2_NEXT:%.*]], [[LOOP_BODY]] ] +; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi i8* [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[PTR_IV_2_NEXT:%.*]], [[LOOP_BODY]] ] ; CHECK-NEXT: [[PTR_IV_1_NEXT]] = getelementptr inbounds i8*, i8** [[PTR_IV_1]], i64 1 ; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr inbounds i8, i8* [[PTR_IV_2]], i64 1 ; CHECK-NEXT: store i8* [[PTR_IV_2_NEXT]], i8** [[PTR_IV_1]], align 8 @@ -145,19 +145,19 @@ define void @pointer_induction(i8* noalias %start, i64 %N) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX2_0:%.*]] = add i64 [[INDEX2]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[START]], i64 [[INDEX2_0]] -; CHECK-NEXT: [[NEXT_GEP_0:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[NEXT_GEP_0]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP16]], align 1 -; CHECK-NEXT: [[TMP17:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[NEXT_GEP_0]] to * -; CHECK-NEXT: store [[TMP17]], * [[TMP18]], align 1 -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX2]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[START]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP7]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 1, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP6]] to * +; CHECK-NEXT: store [[TMP8]], * [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] @@ -169,8 +169,8 @@ define void @pointer_induction(i8* noalias %start, i64 %N) { ; CHECK-NEXT: [[PTR_PHI:%.*]] = phi i8* [ [[PTR_PHI_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INDEX_NXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP24:%.*]] = load i8, i8* [[PTR_PHI]], align 1 -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP24]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[PTR_PHI]], align 1 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP13]], 1 ; CHECK-NEXT: store i8 [[ADD]], i8* [[PTR_PHI]], align 1 ; CHECK-NEXT: [[PTR_PHI_NEXT]] = getelementptr inbounds i8, i8* [[PTR_PHI]], i64 1 ; CHECK-NEXT: [[CMP_I_NOT:%.*]] = icmp eq i8* [[PTR_PHI_NEXT]], [[START]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll index 658bcce1555cb..c0ea3ad14e878 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -153,7 +153,7 @@ define void @widen_2ptrs_phi_unrolled(i32* noalias nocapture %dst, i32* noalias ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[SRC:%.*]], i64 [[N_VEC]] -; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -188,12 +188,12 @@ define void @widen_2ptrs_phi_unrolled(i32* noalias nocapture %dst, i32* noalias ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32* [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32* [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[S_010:%.*]] = phi i32* [ [[INCDEC_PTR1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[D_09:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[D_09:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[S_010]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP21]], 1 ; CHECK-NEXT: store i32 [[MUL]], i32* [[D_09]], align 4 @@ -247,7 +247,7 @@ define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) #0 { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[N_VEC]] -; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i32*, i32** [[B:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i32*, i32** [[B:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] @@ -277,13 +277,13 @@ define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) #0 { ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32** [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32** [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[P:%.*]] = phi i32* [ [[VAR3:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[Q:%.*]] = phi i32** [ [[VAR4:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[Q:%.*]] = phi i32** [ [[VAR4:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[VAR0:%.*]] = phi i32 [ [[VAR2:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[VAR1:%.*]] = load i32, i32* [[P]], align 8 ; CHECK-NEXT: [[VAR2]] = add i32 [[VAR1]], [[VAR0]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll index 8a8abf67e5766..94ccfec39e22e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll @@ -17,21 +17,14 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" -define void @vector_reverse_mask_v4i1(double* %a, double* %cond, i64 %N) #0 { +define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond, i64 %N) #0 { ; CHECK-LABEL: @vector_reverse_mask_v4i1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i64 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; CHECK: vector.memcheck: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[N]] -; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr double, double* [[COND:%.*]], i64 [[N]] -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[SCEVGEP4]], [[A]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt double* [[SCEVGEP]], [[COND]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8 ; CHECK-NEXT: [[IND_END:%.*]] = and i64 [[N]], 7 @@ -40,42 +33,42 @@ define void @vector_reverse_mask_v4i1(double* %a, double* %cond, i64 %N) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = xor i64 [[INDEX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[N]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, double* [[COND]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, double* [[COND:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -3 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8, !alias.scope !0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -4 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 -3 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8, !alias.scope !0 -; CHECK-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x double> [[WIDE_LOAD6]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8 +; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x double> [[WIDE_LOAD1]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = fcmp une <4 x double> [[REVERSE]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = fcmp une <4 x double> [[REVERSE7]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, double* [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = fcmp une <4 x double> [[REVERSE2]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, double* [[TMP10]], i64 -3 -; CHECK-NEXT: [[REVERSE8:%.*]] = shufflevector <4 x i1> [[TMP8]], <4 x i1> poison, <4 x i32> +; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i1> [[TMP8]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>* -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP12]], i32 8, <4 x i1> [[REVERSE8]], <4 x double> poison), !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP12]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP10]], i64 -4 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[TMP13]], i64 -3 -; CHECK-NEXT: [[REVERSE10:%.*]] = shufflevector <4 x i1> [[TMP9]], <4 x i1> poison, <4 x i32> +; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i1> [[TMP9]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>* -; CHECK-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP15]], i32 8, <4 x i1> [[REVERSE10]], <4 x double> poison), !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP15]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison) ; CHECK-NEXT: [[TMP16:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], -; CHECK-NEXT: [[TMP17:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD11]], +; CHECK-NEXT: [[TMP17:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], ; CHECK-NEXT: [[TMP18:%.*]] = bitcast double* [[TMP11]] to <4 x double>* -; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP16]], <4 x double>* [[TMP18]], i32 8, <4 x i1> [[REVERSE8]]), !alias.scope !3, !noalias !0 +; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP16]], <4 x double>* [[TMP18]], i32 8, <4 x i1> [[REVERSE3]]) ; CHECK-NEXT: [[TMP19:%.*]] = bitcast double* [[TMP14]] to <4 x double>* -; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP17]], <4 x double>* [[TMP19]], i32 8, <4 x i1> [[REVERSE10]]), !alias.scope !3, !noalias !0 +; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP17]], <4 x double>* [[TMP19]], i32 8, <4 x i1> [[REVERSE5]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[N]], [[FOR_BODY_PREHEADER]] ], [ [[N]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[N]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -96,7 +89,7 @@ define void @vector_reverse_mask_v4i1(double* %a, double* %cond, i64 %N) #0 { ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; entry: