-
Notifications
You must be signed in to change notification settings - Fork 15k
[VPlan] Convert BuildVector with all-equal values to Broadcast. #165826
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Fold BuildVector where all operands are equal to Broadcast of the first operand. This will subsequently make it easier to remove additional buildvectors/broadcasts, e.g. via llvm#165506.
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: Florian Hahn (fhahn) ChangesFold BuildVector where all operands are equal to Broadcast of the first operand. This will subsequently make it easier to remove additional buildvectors/broadcasts, e.g. via Full diff: https://github.com/llvm/llvm-project/pull/165826.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 986c801abf684..d12588e2861d1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1282,6 +1282,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
}
+ if (match(Def, m_BuildVector()) && all_equal(R.operands())) {
+ Def->replaceAllUsesWith(
+ Builder.createNaryOp(VPInstruction::Broadcast, Def->getOperand(0)));
+ return;
+ }
+
if (auto *Phi = dyn_cast<VPPhi>(Def)) {
if (Phi->getNumOperands() == 1)
Phi->replaceAllUsesWith(Phi->getOperand(0));
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
index bfee39eac0ae2..068f82c7db670 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
@@ -365,8 +365,8 @@ define void @invalid_legacy_cost(i64 %N, ptr %x) #0 {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP6:%.*]] = alloca i8, i64 0, align 16
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> [[TMP7]], ptr [[TMP6]], i32 1
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i64 0
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr ptr, ptr [[X]], i64 [[INDEX]]
; CHECK-NEXT: store <2 x ptr> [[TMP8]], ptr [[TMP9]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll
index ea0148952f51b..0a9494e4c7ade 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll
@@ -10,8 +10,8 @@ define void @licm_replicate_call(double %x, ptr %dst) {
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP1:%.*]] = tail call double @llvm.pow.f64(double [[X]], double 3.000000e+00)
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
index 157b78704234a..35589573eed76 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
@@ -64,9 +64,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2:%.*]], align 8
-; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3:[0-9]+]]
-; TFCOMMON-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
-; TFCOMMON-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP5]], i32 1
+; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR2:[0-9]+]]
+; TFCOMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0
+; TFCOMMON-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
; TFCOMMON-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00)
; TFCOMMON-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0
@@ -79,7 +79,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFCOMMON-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1
; TFCOMMON-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE6]]
; TFCOMMON: pred.store.if1:
-; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1
+; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0
; TFCOMMON-NEXT: store double [[TMP19]], ptr [[P]], align 8
; TFCOMMON-NEXT: br label [[PRED_STORE_CONTINUE6]]
; TFCOMMON: pred.store.continue2:
@@ -105,9 +105,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE9]] ]
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[PRED_STORE_CONTINUE9]] ]
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8
-; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]]
-; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0
-; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP9]], i32 1
+; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR2:[0-9]+]]
+; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0
+; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer
; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00)
; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0
@@ -120,7 +120,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1
; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
; TFA_INTERLEAVE: pred.store.if3:
-; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1
+; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0
; TFA_INTERLEAVE-NEXT: store double [[TMP22]], ptr [[P]], align 8
; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE5]]
; TFA_INTERLEAVE: pred.store.continue4:
@@ -134,7 +134,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 1
; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]]
; TFA_INTERLEAVE: pred.store.if7:
-; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1
+; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0
; TFA_INTERLEAVE-NEXT: store double [[TMP34]], ptr [[P]], align 8
; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE9]]
; TFA_INTERLEAVE: pred.store.continue8:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
index 03087bb883464..4590dfc5326b5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
@@ -199,10 +199,8 @@ define float @uniform_load_replicating_select(ptr %A, ptr %B, i64 %1) {
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 7
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[A]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = fcmp ogt float [[TMP6]], 0.000000e+00
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i1> poison, i1 [[TMP10]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i1> [[TMP8]], i1 [[TMP10]], i32 1
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i1> [[TMP9]], i1 [[TMP10]], i32 2
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i1> [[TMP13]], i1 [[TMP10]], i32 3
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP10]], i64 0
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…cast. (#165826) Fold BuildVector where all operands are equal to Broadcast of the first operand. This will subsequently make it easier to remove additional buildvectors/broadcasts, e.g. via llvm/llvm-project#165506. PR: llvm/llvm-project#165826
Fold BuildVector where all operands are equal to Broadcast of the first operand. This will subsequently make it easier to remove additional buildvectors/broadcasts, e.g. via
#165506.