@@ -170,10 +170,10 @@ class LoopIdiomVectorize {
170
170
bool recognizeFindFirstByte ();
171
171
172
172
Value *expandFindFirstByte (IRBuilder<> &Builder, DomTreeUpdater &DTU,
173
- unsigned VF, Type *CharTy, BasicBlock *ExitSucc ,
174
- BasicBlock *ExitFail, Value *SearchStart ,
175
- Value *SearchEnd , Value *NeedleStart ,
176
- Value *NeedleEnd);
173
+ unsigned VF, Type *CharTy, Value *IndPhi ,
174
+ BasicBlock *ExitSucc, BasicBlock *ExitFail ,
175
+ Value *SearchStart , Value *SearchEnd ,
176
+ Value *NeedleStart, Value * NeedleEnd);
177
177
178
178
void transformFindFirstByte (PHINode *IndPhi, unsigned VF, Type *CharTy,
179
179
BasicBlock *ExitSucc, BasicBlock *ExitFail,
@@ -242,6 +242,37 @@ bool LoopIdiomVectorize::run(Loop *L) {
242
242
return false ;
243
243
}
244
244
245
+ static void fixSuccessorPhis (Loop *L, Value *ScalarRes, Value *VectorRes,
246
+ BasicBlock *SuccBB, BasicBlock *IncBB) {
247
+ for (PHINode &PN : SuccBB->phis ()) {
248
+ // Look through the incoming values to find ScalarRes, meaning this is a
249
+ // PHI collecting the results of the transformation.
250
+ bool ResPhi = false ;
251
+ for (Value *Op : PN.incoming_values ())
252
+ if (Op == ScalarRes) {
253
+ ResPhi = true ;
254
+ break ;
255
+ }
256
+
257
+ // Any PHI that depended upon the result of the transformation needs a new
258
+ // incoming value from IncBB.
259
+ if (ResPhi)
260
+ PN.addIncoming (VectorRes, IncBB);
261
+ else {
262
+ // There should be no other outside uses of other values in the
263
+ // original loop. Any incoming values should either:
264
+ // 1. Be for blocks outside the loop, which aren't interesting. Or ..
265
+ // 2. These are from blocks in the loop with values defined outside
266
+ // the loop. We should a similar incoming value from CmpBB.
267
+ for (BasicBlock *BB : PN.blocks ())
268
+ if (L->contains (BB)) {
269
+ PN.addIncoming (PN.getIncomingValueForBlock (BB), IncBB);
270
+ break ;
271
+ }
272
+ }
273
+ }
274
+ }
275
+
245
276
bool LoopIdiomVectorize::recognizeByteCompare () {
246
277
// Currently the transformation only works on scalable vector types, although
247
278
// there is no fundamental reason why it cannot be made to work for fixed
@@ -935,42 +966,10 @@ void LoopIdiomVectorize::transformByteCompare(GetElementPtrInst *GEPA,
935
966
DTU.applyUpdates ({{DominatorTree::Insert, CmpBB, FoundBB}});
936
967
}
937
968
938
- auto fixSuccessorPhis = [&](BasicBlock *SuccBB) {
939
- for (PHINode &PN : SuccBB->phis ()) {
940
- // At this point we've already replaced all uses of the result from the
941
- // loop with ByteCmp. Look through the incoming values to find ByteCmp,
942
- // meaning this is a Phi collecting the results of the byte compare.
943
- bool ResPhi = false ;
944
- for (Value *Op : PN.incoming_values ())
945
- if (Op == ByteCmpRes) {
946
- ResPhi = true ;
947
- break ;
948
- }
949
-
950
- // Any PHI that depended upon the result of the byte compare needs a new
951
- // incoming value from CmpBB. This is because the original loop will get
952
- // deleted.
953
- if (ResPhi)
954
- PN.addIncoming (ByteCmpRes, CmpBB);
955
- else {
956
- // There should be no other outside uses of other values in the
957
- // original loop. Any incoming values should either:
958
- // 1. Be for blocks outside the loop, which aren't interesting. Or ..
959
- // 2. These are from blocks in the loop with values defined outside
960
- // the loop. We should a similar incoming value from CmpBB.
961
- for (BasicBlock *BB : PN.blocks ())
962
- if (CurLoop->contains (BB)) {
963
- PN.addIncoming (PN.getIncomingValueForBlock (BB), CmpBB);
964
- break ;
965
- }
966
- }
967
- }
968
- };
969
-
970
969
// Ensure all Phis in the successors of CmpBB have an incoming value from it.
971
- fixSuccessorPhis (EndBB);
970
+ fixSuccessorPhis (CurLoop, ByteCmpRes, ByteCmpRes, EndBB, CmpBB );
972
971
if (EndBB != FoundBB)
973
- fixSuccessorPhis (FoundBB);
972
+ fixSuccessorPhis (CurLoop, ByteCmpRes, ByteCmpRes, FoundBB, CmpBB );
974
973
975
974
// The new CmpBB block isn't part of the loop, but will need to be added to
976
975
// the outer loop if there is one.
@@ -1168,8 +1167,9 @@ bool LoopIdiomVectorize::recognizeFindFirstByte() {
1168
1167
1169
1168
Value *LoopIdiomVectorize::expandFindFirstByte (
1170
1169
IRBuilder<> &Builder, DomTreeUpdater &DTU, unsigned VF, Type *CharTy,
1171
- BasicBlock *ExitSucc, BasicBlock *ExitFail, Value *SearchStart,
1172
- Value *SearchEnd, Value *NeedleStart, Value *NeedleEnd) {
1170
+ Value *IndPhi, BasicBlock *ExitSucc, BasicBlock *ExitFail,
1171
+ Value *SearchStart, Value *SearchEnd, Value *NeedleStart,
1172
+ Value *NeedleEnd) {
1173
1173
// Set up some types and constants that we intend to reuse.
1174
1174
auto *PtrTy = Builder.getPtrTy ();
1175
1175
auto *I64Ty = Builder.getInt64Ty ();
@@ -1369,6 +1369,12 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
1369
1369
MatchLCSSA->addIncoming (Search, BB2);
1370
1370
MatchPredLCSSA->addIncoming (MatchPred, BB2);
1371
1371
1372
+ // Ensure all Phis in the successors of BB3/BB5 have an incoming value from
1373
+ // them.
1374
+ fixSuccessorPhis (CurLoop, IndPhi, MatchVal, ExitSucc, BB3);
1375
+ if (ExitSucc != ExitFail)
1376
+ fixSuccessorPhis (CurLoop, IndPhi, MatchVal, ExitFail, BB5);
1377
+
1372
1378
if (VerifyLoops) {
1373
1379
OuterLoop->verifyLoop ();
1374
1380
InnerLoop->verifyLoop ();
@@ -1390,21 +1396,12 @@ void LoopIdiomVectorize::transformFindFirstByte(
1390
1396
DomTreeUpdater DTU (DT, DomTreeUpdater::UpdateStrategy::Lazy);
1391
1397
Builder.SetCurrentDebugLocation (PHBranch->getDebugLoc ());
1392
1398
1393
- Value *MatchVal =
1394
- expandFindFirstByte (Builder, DTU, VF, CharTy, ExitSucc, ExitFail,
1395
- SearchStart, SearchEnd, NeedleStart, NeedleEnd);
1399
+ expandFindFirstByte (Builder, DTU, VF, CharTy, IndPhi, ExitSucc, ExitFail,
1400
+ SearchStart, SearchEnd, NeedleStart, NeedleEnd);
1396
1401
1397
1402
assert (PHBranch->isUnconditional () &&
1398
1403
" Expected preheader to terminate with an unconditional branch." );
1399
1404
1400
- // Add new incoming values with the result of the transformation to PHINodes
1401
- // of ExitSucc that use IndPhi.
1402
- for (auto *U : llvm::make_early_inc_range (IndPhi->users ())) {
1403
- auto *PN = dyn_cast<PHINode>(U);
1404
- if (PN && PN->getParent () == ExitSucc)
1405
- PN->addIncoming (MatchVal, cast<Instruction>(MatchVal)->getParent ());
1406
- }
1407
-
1408
1405
if (VerifyLoops && CurLoop->getParentLoop ()) {
1409
1406
CurLoop->getParentLoop ()->verifyLoop ();
1410
1407
if (!CurLoop->getParentLoop ()->isRecursivelyLCSSAForm (*DT, *LI))
0 commit comments