Skip to content

Commit b7e922a

Browse files
authored
[VPlan] Convert BuildVector with all-equal values to Broadcast. (#165826)
Fold BuildVector where all operands are equal to Broadcast of the first operand. This will subsequently make it easier to remove additional buildvectors/broadcasts, e.g. via #165506. PR: #165826
1 parent 4c21d0c commit b7e922a

File tree

5 files changed

+21
-17
lines changed

5 files changed

+21
-17
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1281,6 +1281,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
12811281
return;
12821282
}
12831283

1284+
if (match(Def, m_BuildVector()) && all_equal(R.operands())) {
1285+
Def->replaceAllUsesWith(
1286+
Builder.createNaryOp(VPInstruction::Broadcast, Def->getOperand(0)));
1287+
return;
1288+
}
1289+
12841290
if (auto *Phi = dyn_cast<VPPhi>(Def)) {
12851291
if (Phi->getNumOperands() == 1)
12861292
Phi->replaceAllUsesWith(Phi->getOperand(0));

llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,8 @@ define void @invalid_legacy_cost(i64 %N, ptr %x) #0 {
365365
; CHECK: [[VECTOR_BODY]]:
366366
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
367367
; CHECK-NEXT: [[TMP6:%.*]] = alloca i8, i64 0, align 16
368-
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0
369-
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> [[TMP7]], ptr [[TMP6]], i32 1
368+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i64 0
369+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer
370370
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr ptr, ptr [[X]], i64 [[INDEX]]
371371
; CHECK-NEXT: store <2 x ptr> [[TMP8]], ptr [[TMP9]], align 8
372372
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2

llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ define void @licm_replicate_call(double %x, ptr %dst) {
1010
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
1111
; CHECK: [[VECTOR_PH]]:
1212
; CHECK-NEXT: [[TMP1:%.*]] = tail call double @llvm.pow.f64(double [[X]], double 3.000000e+00)
13-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 0
14-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1
13+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i64 0
14+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
1515
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1616
; CHECK: [[VECTOR_BODY]]:
1717
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
6464
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
6565
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
6666
; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2:%.*]], align 8
67-
; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3:[0-9]+]]
68-
; TFCOMMON-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
69-
; TFCOMMON-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP5]], i32 1
67+
; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR2:[0-9]+]]
68+
; TFCOMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0
69+
; TFCOMMON-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
7070
; TFCOMMON-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer
7171
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00)
7272
; TFCOMMON-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0
@@ -79,7 +79,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
7979
; TFCOMMON-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1
8080
; TFCOMMON-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE6]]
8181
; TFCOMMON: pred.store.if1:
82-
; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1
82+
; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0
8383
; TFCOMMON-NEXT: store double [[TMP19]], ptr [[P]], align 8
8484
; TFCOMMON-NEXT: br label [[PRED_STORE_CONTINUE6]]
8585
; TFCOMMON: pred.store.continue2:
@@ -105,9 +105,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
105105
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE9]] ]
106106
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[PRED_STORE_CONTINUE9]] ]
107107
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8
108-
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]]
109-
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0
110-
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP9]], i32 1
108+
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR2:[0-9]+]]
109+
; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0
110+
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
111111
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer
112112
; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00)
113113
; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0
@@ -120,7 +120,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
120120
; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1
121121
; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
122122
; TFA_INTERLEAVE: pred.store.if3:
123-
; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1
123+
; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0
124124
; TFA_INTERLEAVE-NEXT: store double [[TMP22]], ptr [[P]], align 8
125125
; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE5]]
126126
; TFA_INTERLEAVE: pred.store.continue4:
@@ -134,7 +134,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
134134
; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 1
135135
; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]]
136136
; TFA_INTERLEAVE: pred.store.if7:
137-
; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1
137+
; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0
138138
; TFA_INTERLEAVE-NEXT: store double [[TMP34]], ptr [[P]], align 8
139139
; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE9]]
140140
; TFA_INTERLEAVE: pred.store.continue8:

llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -199,10 +199,8 @@ define float @uniform_load_replicating_select(ptr %A, ptr %B, i64 %1) {
199199
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 7
200200
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[A]], align 4
201201
; CHECK-NEXT: [[TMP10:%.*]] = fcmp ogt float [[TMP6]], 0.000000e+00
202-
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i1> poison, i1 [[TMP10]], i32 0
203-
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i1> [[TMP8]], i1 [[TMP10]], i32 1
204-
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i1> [[TMP9]], i1 [[TMP10]], i32 2
205-
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i1> [[TMP13]], i1 [[TMP10]], i32 3
202+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP10]], i64 0
203+
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
206204
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]]
207205
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]]
208206
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]]

0 commit comments

Comments
 (0)