|
39 | 39 | #include "llvm/ADT/StringRef.h"
|
40 | 40 | #include "llvm/Analysis/AliasAnalysis.h"
|
41 | 41 | #include "llvm/Analysis/CmpInstAnalysis.h"
|
| 42 | +#include "llvm/Analysis/HashRecognize.h" |
42 | 43 | #include "llvm/Analysis/LoopInfo.h"
|
43 | 44 | #include "llvm/Analysis/LoopPass.h"
|
44 | 45 | #include "llvm/Analysis/MemoryLocation.h"
|
@@ -143,6 +144,14 @@ static cl::opt<bool, true>
|
143 | 144 | cl::location(DisableLIRP::Wcslen), cl::init(false),
|
144 | 145 | cl::ReallyHidden);
|
145 | 146 |
|
| 147 | +bool DisableLIRP::HashRecognize; |
| 148 | +static cl::opt<bool, true> |
| 149 | + DisableLIRPHashRecognize("disable-" DEBUG_TYPE "-hashrecognize", |
| 150 | + cl::desc("Proceed with loop idiom recognize pass, " |
| 151 | + "but do not optimize CRC loops."), |
| 152 | + cl::location(DisableLIRP::HashRecognize), |
| 153 | + cl::init(false), cl::ReallyHidden); |
| 154 | + |
146 | 155 | static cl::opt<bool> UseLIRCodeSizeHeurs(
|
147 | 156 | "use-lir-code-size-heurs",
|
148 | 157 | cl::desc("Use loop idiom recognition code size heuristics when compiling "
|
@@ -242,6 +251,7 @@ class LoopIdiomRecognize {
|
242 | 251 | const SCEV *BECount);
|
243 | 252 | bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
|
244 | 253 | bool IsLoopMemset = false);
|
| 254 | + bool optimizeCRCLoop(const PolynomialInfo &Info); |
245 | 255 |
|
246 | 256 | /// @}
|
247 | 257 | /// \name Noncountable Loop Idiom Handling
|
@@ -287,6 +297,8 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
|
287 | 297 | // but ORE cannot be preserved (see comment before the pass definition).
|
288 | 298 | OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
|
289 | 299 |
|
| 300 | + std::optional<PolynomialInfo> HR; |
| 301 | + |
290 | 302 | LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
|
291 | 303 | AR.MSSA, DL, ORE);
|
292 | 304 | if (!LIR.runOnLoop(&L))
|
@@ -335,7 +347,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
|
335 | 347 | HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
|
336 | 348 | HasMemcpy = TLI->has(LibFunc_memcpy);
|
337 | 349 |
|
338 |
| - if (HasMemset || HasMemsetPattern || ForceMemsetPatternIntrinsic || HasMemcpy) |
| 350 | + if (HasMemset || HasMemsetPattern || ForceMemsetPatternIntrinsic || |
| 351 | + HasMemcpy || !DisableLIRP::HashRecognize) |
339 | 352 | if (SE->hasLoopInvariantBackedgeTakenCount(L))
|
340 | 353 | return runOnCountableLoop();
|
341 | 354 |
|
@@ -378,6 +391,13 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
|
378 | 391 |
|
379 | 392 | MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
|
380 | 393 | }
|
| 394 | + |
| 395 | + // Optimize a CRC loop if HashRecognize found one, provided we're not |
| 396 | + // optimizing for size. |
| 397 | + if (!DisableLIRP::HashRecognize && !ApplyCodeSizeHeuristics) |
| 398 | + if (auto Res = HashRecognize(*CurLoop, *SE).getResult()) |
| 399 | + optimizeCRCLoop(*Res); |
| 400 | + |
381 | 401 | return MadeChange;
|
382 | 402 | }
|
383 | 403 |
|
@@ -1514,6 +1534,160 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
|
1514 | 1534 | return false;
|
1515 | 1535 | }
|
1516 | 1536 |
|
| 1537 | +bool LoopIdiomRecognize::optimizeCRCLoop(const PolynomialInfo &Info) { |
| 1538 | + // FIXME: Hexagon has a special HexagonLoopIdiom that optimizes CRC using |
| 1539 | + // carry-less multiplication instructions, which is more efficient than our |
| 1540 | + // Sarwate table-lookup optimization. Hence, until we're able to emit |
| 1541 | + // target-specific instructions for Hexagon, subsuming HexagonLoopIdiom, |
| 1542 | + // disable the optimization for Hexagon. |
| 1543 | + Module &M = *CurLoop->getHeader()->getModule(); |
| 1544 | + Triple TT(M.getTargetTriple()); |
| 1545 | + if (TT.getArch() == Triple::hexagon) |
| 1546 | + return false; |
| 1547 | + |
| 1548 | + // First, create a new GlobalVariable corresponding to the |
| 1549 | + // Sarwate-lookup-table. |
| 1550 | + Type *CRCTy = Info.LHS->getType(); |
| 1551 | + unsigned CRCBW = CRCTy->getIntegerBitWidth(); |
| 1552 | + std::array<Constant *, 256> CRCConstants; |
| 1553 | + transform(HashRecognize::genSarwateTable(Info.RHS, Info.ByteOrderSwapped), |
| 1554 | + CRCConstants.begin(), |
| 1555 | + [CRCTy](const APInt &E) { return ConstantInt::get(CRCTy, E); }); |
| 1556 | + Constant *ConstArray = |
| 1557 | + ConstantArray::get(ArrayType::get(CRCTy, 256), CRCConstants); |
| 1558 | + GlobalVariable *GV = |
| 1559 | + new GlobalVariable(M, ConstArray->getType(), true, |
| 1560 | + GlobalValue::PrivateLinkage, ConstArray, ".crctable"); |
| 1561 | + |
| 1562 | + PHINode *IV = CurLoop->getCanonicalInductionVariable(); |
| 1563 | + SmallVector<PHINode *, 2> Cleanup; |
| 1564 | + |
| 1565 | + // Next, mark all PHIs for removal except IV. |
| 1566 | + { |
| 1567 | + for (PHINode &PN : CurLoop->getHeader()->phis()) { |
| 1568 | + if (&PN == IV) |
| 1569 | + continue; |
| 1570 | + PN.replaceAllUsesWith(PoisonValue::get(PN.getType())); |
| 1571 | + Cleanup.push_back(&PN); |
| 1572 | + } |
| 1573 | + } |
| 1574 | + |
| 1575 | + // Next, fix up the trip count. |
| 1576 | + { |
| 1577 | + unsigned NewBTC = (Info.TripCount / 8) - 1; |
| 1578 | + BasicBlock *LoopBlk = CurLoop->getLoopLatch(); |
| 1579 | + BranchInst *BrInst = cast<BranchInst>(LoopBlk->getTerminator()); |
| 1580 | + CmpPredicate ExitPred = BrInst->getSuccessor(0) == LoopBlk |
| 1581 | + ? ICmpInst::Predicate::ICMP_NE |
| 1582 | + : ICmpInst::Predicate::ICMP_EQ; |
| 1583 | + Instruction *ExitCond = CurLoop->getLatchCmpInst(); |
| 1584 | + Value *ExitLimit = ConstantInt::get(IV->getType(), NewBTC); |
| 1585 | + IRBuilder<> Builder(ExitCond); |
| 1586 | + Value *NewExitCond = |
| 1587 | + Builder.CreateICmp(ExitPred, IV, ExitLimit, "exit.cond"); |
| 1588 | + ExitCond->replaceAllUsesWith(NewExitCond); |
| 1589 | + deleteDeadInstruction(ExitCond); |
| 1590 | + } |
| 1591 | + |
| 1592 | + // Finally, fill the loop with the Sarwate-table-lookup logic, and replace all |
| 1593 | + // uses of ComputedValue. |
| 1594 | + // |
| 1595 | + // Little-endian: |
| 1596 | + // crc = (crc >> 8) ^ tbl[(iv'th byte of data) ^ (bottom byte of crc)] |
| 1597 | + // Big-Endian: |
| 1598 | + // crc = (crc << 8) ^ tbl[(iv'th byte of data) ^ (top byte of crc)] |
| 1599 | + { |
| 1600 | + auto LoByte = [](IRBuilderBase &Builder, Value *Op, const Twine &Name) { |
| 1601 | + Type *OpTy = Op->getType(); |
| 1602 | + unsigned OpBW = OpTy->getIntegerBitWidth(); |
| 1603 | + return OpBW > 8 |
| 1604 | + ? Builder.CreateAnd(Op, ConstantInt::get(OpTy, 0XFF), Name) |
| 1605 | + : Op; |
| 1606 | + }; |
| 1607 | + auto HiIdx = [LoByte, CRCBW](IRBuilderBase &Builder, Value *Op, |
| 1608 | + const Twine &Name) { |
| 1609 | + Type *OpTy = Op->getType(); |
| 1610 | + |
| 1611 | + // When the bitwidth of the CRC mismatches the Op's bitwidth, we need to |
| 1612 | + // use the CRC's bitwidth as the reference for shifting right. |
| 1613 | + return LoByte(Builder, |
| 1614 | + CRCBW > 8 ? Builder.CreateLShr( |
| 1615 | + Op, ConstantInt::get(OpTy, CRCBW - 8), Name) |
| 1616 | + : Op, |
| 1617 | + Name + ".lo.byte"); |
| 1618 | + }; |
| 1619 | + |
| 1620 | + IRBuilder<> Builder(CurLoop->getHeader(), |
| 1621 | + CurLoop->getHeader()->getFirstNonPHIIt()); |
| 1622 | + |
| 1623 | + // Create the CRC PHI, and initialize its incoming value to the initial |
| 1624 | + // value of CRC. |
| 1625 | + PHINode *CRCPhi = Builder.CreatePHI(CRCTy, 2, "crc"); |
| 1626 | + CRCPhi->addIncoming(Info.LHS, CurLoop->getLoopPreheader()); |
| 1627 | + |
| 1628 | + // CRC is now an evolving variable, initialized to the PHI. |
| 1629 | + Value *CRC = CRCPhi; |
| 1630 | + |
| 1631 | + // TableIndexer = ((top|bottom) byte of CRC). It is XOR'ed with (iv'th byte |
| 1632 | + // of LHSAux), if LHSAux is non-nullptr. |
| 1633 | + Value *Indexer = CRC; |
| 1634 | + if (Value *Data = Info.LHSAux) { |
| 1635 | + Type *DataTy = Data->getType(); |
| 1636 | + |
| 1637 | + // To index into the (iv'th byte of LHSAux), we multiply iv by 8, and we |
| 1638 | + // shift right by that amount, and take the lo-byte (in the little-endian |
| 1639 | + // case), or shift left by that amount, and take the hi-idx (in the |
| 1640 | + // big-endian case). |
| 1641 | + Value *IVBits = Builder.CreateZExtOrTrunc( |
| 1642 | + Builder.CreateShl(IV, 3, "iv.bits"), DataTy, "iv.indexer"); |
| 1643 | + Value *DataIndexer = |
| 1644 | + Info.ByteOrderSwapped |
| 1645 | + ? Builder.CreateShl(Data, IVBits, "data.indexer") |
| 1646 | + : Builder.CreateLShr(Data, IVBits, "data.indexer"); |
| 1647 | + Indexer = Builder.CreateXor( |
| 1648 | + DataIndexer, |
| 1649 | + Builder.CreateZExtOrTrunc(Indexer, DataTy, "crc.indexer.cast"), |
| 1650 | + "crc.data.indexer"); |
| 1651 | + } |
| 1652 | + |
| 1653 | + Indexer = Info.ByteOrderSwapped ? HiIdx(Builder, Indexer, "indexer.hi") |
| 1654 | + : LoByte(Builder, Indexer, "indexer.lo"); |
| 1655 | + |
| 1656 | + // Always index into a GEP using the index type. |
| 1657 | + Indexer = Builder.CreateZExt( |
| 1658 | + Indexer, SE->getDataLayout().getIndexType(GV->getType()), |
| 1659 | + "indexer.ext"); |
| 1660 | + |
| 1661 | + // CRCTableLd = CRCTable[(iv'th byte of data) ^ (top|bottom) byte of CRC]. |
| 1662 | + Value *CRCTableGEP = |
| 1663 | + Builder.CreateInBoundsGEP(CRCTy, GV, Indexer, "tbl.ptradd"); |
| 1664 | + Value *CRCTableLd = Builder.CreateLoad(CRCTy, CRCTableGEP, "tbl.ld"); |
| 1665 | + |
| 1666 | + // CRCNext = (CRC (<<|>>) 8) ^ CRCTableLd, or simply CRCTableLd in case of |
| 1667 | + // CRC-8. |
| 1668 | + Value *CRCNext = CRCTableLd; |
| 1669 | + if (CRCBW > 8) { |
| 1670 | + Value *CRCShift = Info.ByteOrderSwapped |
| 1671 | + ? Builder.CreateShl(CRC, 8, "crc.be.shift") |
| 1672 | + : Builder.CreateLShr(CRC, 8, "crc.le.shift"); |
| 1673 | + CRCNext = Builder.CreateXor(CRCShift, CRCTableLd, "crc.next"); |
| 1674 | + } |
| 1675 | + |
| 1676 | + // Connect the back-edge for the loop, and RAUW the ComputedValue. |
| 1677 | + CRCPhi->addIncoming(CRCNext, CurLoop->getLoopLatch()); |
| 1678 | + Info.ComputedValue->replaceUsesOutsideBlock(CRCNext, |
| 1679 | + CurLoop->getLoopLatch()); |
| 1680 | + } |
| 1681 | + |
| 1682 | + // Cleanup. |
| 1683 | + { |
| 1684 | + for (PHINode *PN : Cleanup) |
| 1685 | + RecursivelyDeleteDeadPHINode(PN); |
| 1686 | + SE->forgetLoop(CurLoop); |
| 1687 | + } |
| 1688 | + return true; |
| 1689 | +} |
| 1690 | + |
1517 | 1691 | bool LoopIdiomRecognize::runOnNoncountableLoop() {
|
1518 | 1692 | LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
|
1519 | 1693 | << CurLoop->getHeader()->getParent()->getName()
|
|
0 commit comments