diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll index 761c0b62e98a3..5742b3ad45749 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll @@ -24,20 +24,21 @@ target triple = "aarch64-linux-gnu" ; DEBUG-EPILOG-PREFER-SCALABLE: Create Skeleton for epilogue vectorized loop (first pass) ; DEBUG-EPILOG-PREFER-SCALABLE: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:vscale x 8, Epilogue Loop UF:1 -define void @main_vf_vscale_x_16(ptr %A) #0 { +define void @main_vf_vscale_x_16(ptr %A, i64 %n) #0 { ; CHECK-LABEL: @main_vf_vscale_x_16( ; CHECK-NEXT: iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N:%.*]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -51,26 +52,29 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]] +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 8 +; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX1]] ; CHECK-NEXT: store <8 x i8> splat (i8 1), ptr [[TMP9]], align 1 ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 -; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; @@ -78,18 +82,18 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: iter.check: ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK-EPILOG-PREFER-SCALABLE: vector.main.loop.iter.check: ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5 -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 1024, [[TMP3]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP3]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-EPILOG-PREFER-SCALABLE: vector.ph: ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32 -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP5]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-EPILOG-PREFER-SCALABLE: vector.body: ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -103,20 +107,20 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-EPILOG-PREFER-SCALABLE: middle.block: -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.iter.check: -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 3 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP12]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.ph: ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 8 -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF2:%.*]] = urem i64 1024, [[TMP14]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC3:%.*]] = sub i64 1024, [[N_MOD_VF2]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], [[TMP14]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.vector.body: ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -124,9 +128,9 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: store splat (i8 1), ptr [[TMP15]], align 1 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], [[TMP14]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.middle.block: -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[CMP_N6:%.*]] = icmp eq i64 1024, [[N_VEC3]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.scalar.ph: ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] @@ -141,7 +145,7 @@ for.body: %arrayidx = getelementptr inbounds i8, ptr %A, i64 %iv store i8 1, ptr %arrayidx, align 1 %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp ne i64 %iv.next, 1024 + %exitcond = icmp ne i64 %iv.next, %n br i1 %exitcond, label %for.body, label %exit exit: