Skip to content

Commit

Permalink
[LV] Restore metadata to disable runtime unrolling for epilogue loop.
Browse files Browse the repository at this point in the history
After d4a8fc3 LV stopped adding metadata to disable runtime
unrolling to the vectorized epilogue loop. This was missed because
278aa65 removed the relevant test coverage.

This patch fixes that by adding the relevant metadata after
vector loop generation.
  • Loading branch information
fhahn committed Jan 16, 2022
1 parent be9eafc commit 070d103
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 83 deletions.
4 changes: 3 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -8003,6 +8003,9 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
LoopVectorizeHints Hints(L, true, *ORE);
Hints.setAlreadyVectorized();
}
// Disable runtime unrolling when vectorizing the epilogue loop.
if (CanonicalIVStartValue)
AddRuntimeUnrollDisableMetaData(L);

// 3. Fix the vectorized code: take care of header phi's, live-outs,
// predication, updating analyses.
Expand Down Expand Up @@ -8273,7 +8276,6 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
createInductionResumeValues(Lp, {VecEpilogueIterationCountCheck,
EPI.VectorTripCount} /* AdditionalBypass */);

AddRuntimeUnrollDisableMetaData(Lp);
return {completeLoopSkeleton(Lp, OrigLoopID), EPResumeVal};
}

Expand Down
@@ -1,4 +1,3 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 -S | FileCheck %s --check-prefix VF-TWO-CHECK
; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -epilogue-vectorization-force-VF=4 -S | FileCheck %s --check-prefix VF-FOUR-CHECK

Expand Down Expand Up @@ -194,7 +193,7 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP95]], <4 x float>* [[TMP131]], align 4
; VF-TWO-CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 48
; VF-TWO-CHECK-NEXT: [[TMP132:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VF-TWO-CHECK-NEXT: br i1 [[TMP132]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; VF-TWO-CHECK-NEXT: br i1 [[TMP132]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOPID_MV:![0-9]+]]
; VF-TWO-CHECK: middle.block:
; VF-TWO-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; VF-TWO-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
Expand Down Expand Up @@ -225,7 +224,7 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias
; VF-TWO-CHECK-NEXT: store <2 x float> [[TMP140]], <2 x float>* [[TMP143]], align 4
; VF-TWO-CHECK-NEXT: [[INDEX_NEXT28]] = add nuw i64 [[INDEX27]], 2
; VF-TWO-CHECK-NEXT: [[TMP144:%.*]] = icmp eq i64 [[INDEX_NEXT28]], [[N_VEC26]]
; VF-TWO-CHECK-NEXT: br i1 [[TMP144]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
; VF-TWO-CHECK-NEXT: br i1 [[TMP144]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOPID_EV:![0-9]+]]
; VF-TWO-CHECK: vec.epilog.middle.block:
; VF-TWO-CHECK-NEXT: [[CMP_N29:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC26]]
; VF-TWO-CHECK-NEXT: br i1 [[CMP_N29]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
Expand Down Expand Up @@ -914,7 +913,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP115]], align 4
; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; VF-FOUR-CHECK-NEXT: [[TMP116:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VF-FOUR-CHECK-NEXT: br i1 [[TMP116]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; VF-FOUR-CHECK-NEXT: br i1 [[TMP116]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOPID_MV_CM:![0-9]+]]
; VF-FOUR-CHECK: middle.block:
; VF-FOUR-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
Expand Down Expand Up @@ -950,7 +949,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP126]], <4 x float>* [[TMP129]], align 4
; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT19]] = add nuw i64 [[INDEX18]], 4
; VF-FOUR-CHECK-NEXT: [[TMP130:%.*]] = icmp eq i64 [[INDEX_NEXT19]], [[N_VEC17]]
; VF-FOUR-CHECK-NEXT: br i1 [[TMP130]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; VF-FOUR-CHECK-NEXT: br i1 [[TMP130]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOPID_EV_CM:![0-9]+]]
; VF-FOUR-CHECK: vec.epilog.middle.block:
; VF-FOUR-CHECK-NEXT: [[CMP_N22:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC17]]
; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N22]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
Expand All @@ -972,16 +971,14 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
; VF-FOUR-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; VF-FOUR-CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1
; VF-FOUR-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]]
; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOPID_MS_CM:![0-9]+]]
; VF-FOUR-CHECK: for.end.loopexit.loopexit:
; VF-FOUR-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]]
; VF-FOUR-CHECK: for.end.loopexit:
; VF-FOUR-CHECK-NEXT: br label [[FOR_END]]
; VF-FOUR-CHECK: for.end:
; VF-FOUR-CHECK-NEXT: ret i32 0
;


entry:
%cmp1 = icmp sgt i32 %n, 1
br i1 %cmp1, label %for.body.preheader, label %for.end
Expand Down Expand Up @@ -1014,4 +1011,14 @@ for.end: ; preds = %for.end.loopexit, %
ret i32 0
}

; VF-TWO-CHECK-DAG: [[LOOPID_MV]] = distinct !{[[LOOPID_MV]], [[LOOPID_DISABLE_VECT:!.*]]}
; VF-TWO-CHECK-DAG: [[LOOPID_EV]] = distinct !{[[LOOPID_EV]], [[LOOPID_DISABLE_VECT]], [[LOOPID_DISABLE_UNROLL:!.*]]}
; VF-TWO-CHECK-DAG: [[LOOPID_DISABLE_VECT]] = [[DISABLE_VECT_STR:!{!"llvm.loop.isvectorized".*}.*]]
; VF-TWO-CHECK-DAG: [[LOOPID_DISABLE_UNROLL]] = [[DISABLE_UNROLL_STR:!{!"llvm.loop.unroll.runtime.disable"}.*]]
;
; VF-FOUR-CHECK-DAG: [[LOOPID_MV_CM]] = distinct !{[[LOOPID_MV_CM]], [[LOOPID_DISABLE_VECT_CM:!.*]]}
; VF-FOUR-CHECK-DAG: [[LOOPID_EV_CM]] = distinct !{[[LOOPID_EV_CM]], [[LOOPID_DISABLE_VECT_CM]], [[LOOPID_DISABLE_UNROLL_CM:!.*]]}
; VF-FOUR-CHECK-DAG: [[LOOPID_DISABLE_VECT_CM]] = [[DISABLE_VECT_STR_CM:!{!"llvm.loop.isvectorized".*}.*]]
; VF-FOUR-CHECK-DAG: [[LOOPID_DISABLE_UNROLL_CM]] = [[DISABLE_UNROLL_STR_CM:!{!"llvm.loop.unroll.runtime.disable"}.*]]
;
attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-spe" "unsafe-fp-math"="true" "use-soft-float"="false" }
Expand Up @@ -186,7 +186,7 @@ define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b,
; CHECK: latch:
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
Expand Down Expand Up @@ -257,17 +257,17 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32*
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !16, !noalias !19
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !17, !noalias !20
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>*
; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT19]], <16 x i32>* [[TMP3]], align 4, !alias.scope !16, !noalias !19
; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT19]], <16 x i32>* [[TMP3]], align 4, !alias.scope !17, !noalias !20
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[C]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <16 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP5]], i32 8, <16 x i1> [[TMP2]], <16 x i32> poison), !alias.scope !22
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[WIDE_MASKED_LOAD]], <16 x i32*> [[BROADCAST_SPLAT21]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !23, !noalias !22
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP5]], i32 8, <16 x i1> [[TMP2]], <16 x i32> poison), !alias.scope !23
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[WIDE_MASKED_LOAD]], <16 x i32*> [[BROADCAST_SPLAT21]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !24, !noalias !23
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX16]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
Expand Down Expand Up @@ -300,7 +300,7 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32*
; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> [[WIDE_MASKED_LOAD32]], <8 x i32*> [[BROADCAST_SPLAT34]], i32 4, <8 x i1> [[TMP9]])
; CHECK-NEXT: [[INDEX_NEXT35]] = add nuw i64 [[INDEX26]], 8
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT35]], [[N_VEC24]]
; CHECK-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: [[CMP_N25:%.*]] = icmp eq i64 [[SMAX22]], [[N_VEC24]]
; CHECK-NEXT: br i1 [[CMP_N25]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
Expand All @@ -322,7 +322,7 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32*
; CHECK: latch:
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP27:![0-9]+]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
Expand Down

0 comments on commit 070d103

Please sign in to comment.