| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| PCH_CXX_SOURCE = pch.h | ||
| CXX_SOURCES = main.cpp | ||
|
|
||
| include Makefile.rules |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| """ | ||
| Tests that we correctly track AST layout info | ||
| (specifically alignment) when moving AST nodes | ||
| between ClangASTImporter instances (in this case, | ||
| from pch to executable to expression AST). | ||
| """ | ||
|
|
||
| import lldb | ||
| import os | ||
| from lldbsuite.test.decorators import * | ||
| from lldbsuite.test.lldbtest import * | ||
| from lldbsuite.test import lldbutil | ||
|
|
||
|
|
||
| class TestPchAlignment(TestBase): | ||
| @add_test_categories(["gmodules"]) | ||
| def test_expr(self): | ||
| self.build() | ||
| lldbutil.run_to_source_breakpoint( | ||
| self, "return data", lldb.SBFileSpec("main.cpp") | ||
| ) | ||
|
|
||
| self.expect( | ||
| "frame variable data", | ||
| substrs=["row = 1", "col = 2", "row = 3", "col = 4", "stride = 5"], | ||
| ) | ||
|
|
||
| @add_test_categories(["gmodules"]) | ||
| def test_frame_var(self): | ||
| self.build() | ||
| lldbutil.run_to_source_breakpoint( | ||
| self, "return data", lldb.SBFileSpec("main.cpp") | ||
| ) | ||
|
|
||
| self.expect_expr( | ||
| "data", | ||
| result_type="MatrixData", | ||
| result_children=[ | ||
| ValueCheck( | ||
| name="section", | ||
| children=[ | ||
| ValueCheck( | ||
| name="origin", | ||
| children=[ | ||
| ValueCheck(name="row", value="1"), | ||
| ValueCheck(name="col", value="2"), | ||
| ], | ||
| ), | ||
| ValueCheck( | ||
| name="size", | ||
| children=[ | ||
| ValueCheck(name="row", value="3"), | ||
| ValueCheck(name="col", value="4"), | ||
| ], | ||
| ), | ||
| ], | ||
| ), | ||
| ValueCheck(name="stride", value="5"), | ||
| ], | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| int main(int argc, const char *argv[]) { | ||
| struct MatrixData data = {0}; | ||
| data.section.origin.row = 1; | ||
| data.section.origin.col = 2; | ||
| data.section.size.row = 3; | ||
| data.section.size.col = 4; | ||
| data.stride = 5; | ||
|
|
||
| return data.section.size.row; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| #ifndef PCH_H_IN | ||
| #define PCH_H_IN | ||
|
|
||
| static const int kAlignment = 64; | ||
|
|
||
| struct [[gnu::aligned(kAlignment)]] RowCol { | ||
| unsigned row; | ||
| unsigned col; | ||
| }; | ||
|
|
||
| struct [[gnu::aligned(kAlignment)]] Submatrix { | ||
| struct RowCol origin; | ||
| struct RowCol size; | ||
| }; | ||
|
|
||
| struct [[gnu::aligned(kAlignment)]] MatrixData { | ||
| struct Submatrix section; | ||
| unsigned stride; | ||
| }; | ||
|
|
||
| #endif // _H_IN |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,216 @@ | ||
| //===----- HexagonLoopAlign.cpp - Generate loop alignment directives -----===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| // Inspect a basic block and if its single basic block loop with a small | ||
| // number of instructions, set the prefLoopAlignment to 32 bytes (5). | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #define DEBUG_TYPE "hexagon-loop-align" | ||
|
|
||
| #include "HexagonTargetMachine.h" | ||
| #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" | ||
| #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" | ||
| #include "llvm/CodeGen/SchedulerRegistry.h" | ||
| #include "llvm/Support/Debug.h" | ||
|
|
||
| using namespace llvm; | ||
|
|
||
| static cl::opt<bool> | ||
| DisableLoopAlign("disable-hexagon-loop-align", cl::Hidden, | ||
| cl::desc("Disable Hexagon loop alignment pass")); | ||
|
|
||
| static cl::opt<uint32_t> HVXLoopAlignLimitUB( | ||
| "hexagon-hvx-loop-align-limit-ub", cl::Hidden, cl::init(16), | ||
| cl::desc("Set hexagon hvx loop upper bound align limit")); | ||
|
|
||
| static cl::opt<uint32_t> TinyLoopAlignLimitUB( | ||
| "hexagon-tiny-loop-align-limit-ub", cl::Hidden, cl::init(16), | ||
| cl::desc("Set hexagon tiny-core loop upper bound align limit")); | ||
|
|
||
| static cl::opt<uint32_t> | ||
| LoopAlignLimitUB("hexagon-loop-align-limit-ub", cl::Hidden, cl::init(8), | ||
| cl::desc("Set hexagon loop upper bound align limit")); | ||
|
|
||
| static cl::opt<uint32_t> | ||
| LoopAlignLimitLB("hexagon-loop-align-limit-lb", cl::Hidden, cl::init(4), | ||
| cl::desc("Set hexagon loop lower bound align limit")); | ||
|
|
||
| static cl::opt<uint32_t> | ||
| LoopBndlAlignLimit("hexagon-loop-bundle-align-limit", cl::Hidden, | ||
| cl::init(4), | ||
| cl::desc("Set hexagon loop align bundle limit")); | ||
|
|
||
| static cl::opt<uint32_t> TinyLoopBndlAlignLimit( | ||
| "hexagon-tiny-loop-bundle-align-limit", cl::Hidden, cl::init(8), | ||
| cl::desc("Set hexagon tiny-core loop align bundle limit")); | ||
|
|
||
| static cl::opt<uint32_t> | ||
| LoopEdgeThreshold("hexagon-loop-edge-threshold", cl::Hidden, cl::init(7500), | ||
| cl::desc("Set hexagon loop align edge theshold")); | ||
|
|
||
| namespace llvm { | ||
| FunctionPass *createHexagonLoopAlign(); | ||
| void initializeHexagonLoopAlignPass(PassRegistry &); | ||
| } // namespace llvm | ||
|
|
||
| namespace { | ||
|
|
||
| class HexagonLoopAlign : public MachineFunctionPass { | ||
| const HexagonSubtarget *HST = nullptr; | ||
| const TargetMachine *HTM = nullptr; | ||
| const HexagonInstrInfo *HII = nullptr; | ||
|
|
||
| public: | ||
| static char ID; | ||
| HexagonLoopAlign() : MachineFunctionPass(ID) { | ||
| initializeHexagonLoopAlignPass(*PassRegistry::getPassRegistry()); | ||
| } | ||
| bool shouldBalignLoop(MachineBasicBlock &BB, bool AboveThres); | ||
| bool isSingleLoop(MachineBasicBlock &MBB); | ||
| bool attemptToBalignSmallLoop(MachineFunction &MF, MachineBasicBlock &MBB); | ||
|
|
||
| void getAnalysisUsage(AnalysisUsage &AU) const override { | ||
| AU.addRequired<MachineBranchProbabilityInfo>(); | ||
| AU.addRequired<MachineBlockFrequencyInfo>(); | ||
| MachineFunctionPass::getAnalysisUsage(AU); | ||
| } | ||
|
|
||
| StringRef getPassName() const override { return "Hexagon LoopAlign pass"; } | ||
| bool runOnMachineFunction(MachineFunction &MF) override; | ||
| }; | ||
|
|
||
| char HexagonLoopAlign::ID = 0; | ||
|
|
||
| bool HexagonLoopAlign::shouldBalignLoop(MachineBasicBlock &BB, | ||
| bool AboveThres) { | ||
| bool isVec = false; | ||
| unsigned InstCnt = 0; | ||
| unsigned BndlCnt = 0; | ||
|
|
||
| for (MachineBasicBlock::instr_iterator II = BB.instr_begin(), | ||
| IE = BB.instr_end(); | ||
| II != IE; ++II) { | ||
|
|
||
| // End if the instruction is endloop. | ||
| if (HII->isEndLoopN(II->getOpcode())) | ||
| break; | ||
| // Count the number of bundles. | ||
| if (II->isBundle()) { | ||
| BndlCnt++; | ||
| continue; | ||
| } | ||
| // Skip over debug instructions. | ||
| if (II->isDebugInstr()) | ||
| continue; | ||
| // Check if there are any HVX instructions in loop. | ||
| isVec |= HII->isHVXVec(*II); | ||
| // Count the number of instructions. | ||
| InstCnt++; | ||
| } | ||
|
|
||
| LLVM_DEBUG({ | ||
| dbgs() << "Bundle Count : " << BndlCnt << "\n"; | ||
| dbgs() << "Instruction Count : " << InstCnt << "\n"; | ||
| }); | ||
|
|
||
| unsigned LimitUB = 0; | ||
| unsigned LimitBndl = LoopBndlAlignLimit; | ||
| // The conditions in the order of priority. | ||
| if (HST->isTinyCore()) { | ||
| LimitUB = TinyLoopAlignLimitUB; | ||
| LimitBndl = TinyLoopBndlAlignLimit; | ||
| } else if (isVec) | ||
| LimitUB = HVXLoopAlignLimitUB; | ||
| else if (AboveThres) | ||
| LimitUB = LoopAlignLimitUB; | ||
|
|
||
| // if the upper bound is not set to a value, implies we didn't meet | ||
| // the criteria. | ||
| if (LimitUB == 0) | ||
| return false; | ||
|
|
||
| return InstCnt >= LoopAlignLimitLB && InstCnt <= LimitUB && | ||
| BndlCnt <= LimitBndl; | ||
| } | ||
|
|
||
| bool HexagonLoopAlign::isSingleLoop(MachineBasicBlock &MBB) { | ||
| int Succs = MBB.succ_size(); | ||
| return (MBB.isSuccessor(&MBB) && (Succs == 2)); | ||
| } | ||
|
|
||
| bool HexagonLoopAlign::attemptToBalignSmallLoop(MachineFunction &MF, | ||
| MachineBasicBlock &MBB) { | ||
| if (!isSingleLoop(MBB)) | ||
| return false; | ||
|
|
||
| const MachineBranchProbabilityInfo *MBPI = | ||
| &getAnalysis<MachineBranchProbabilityInfo>(); | ||
| const MachineBlockFrequencyInfo *MBFI = | ||
| &getAnalysis<MachineBlockFrequencyInfo>(); | ||
|
|
||
| // Compute frequency of back edge, | ||
| BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB); | ||
| BranchProbability BrProb = MBPI->getEdgeProbability(&MBB, &MBB); | ||
| BlockFrequency EdgeFreq = BlockFreq * BrProb; | ||
| LLVM_DEBUG({ | ||
| dbgs() << "Loop Align Pass:\n"; | ||
| dbgs() << "\tedge with freq(" << EdgeFreq.getFrequency() << ")\n"; | ||
| }); | ||
|
|
||
| bool AboveThres = EdgeFreq.getFrequency() > LoopEdgeThreshold; | ||
| if (shouldBalignLoop(MBB, AboveThres)) { | ||
| // We found a loop, change its alignment to be 32 (5). | ||
| MBB.setAlignment(llvm::Align(1 << 5)); | ||
| return true; | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
| // Inspect each basic block, and if its a single BB loop, see if it | ||
| // meets the criteria for increasing alignment to 32. | ||
|
|
||
| bool HexagonLoopAlign::runOnMachineFunction(MachineFunction &MF) { | ||
|
|
||
| HST = &MF.getSubtarget<HexagonSubtarget>(); | ||
| HII = HST->getInstrInfo(); | ||
| HTM = &MF.getTarget(); | ||
|
|
||
| if (skipFunction(MF.getFunction())) | ||
| return false; | ||
| if (DisableLoopAlign) | ||
| return false; | ||
|
|
||
| // This optimization is performed at | ||
| // i) -O2 and above, and when the loop has a HVX instruction. | ||
| // ii) -O3 | ||
| if (HST->useHVXOps()) { | ||
| if (HTM->getOptLevel() < CodeGenOptLevel::Default) | ||
| return false; | ||
| } else { | ||
| if (HTM->getOptLevel() < CodeGenOptLevel::Aggressive) | ||
| return false; | ||
| } | ||
|
|
||
| bool Changed = false; | ||
| for (MachineFunction::iterator MBBi = MF.begin(), MBBe = MF.end(); | ||
| MBBi != MBBe; ++MBBi) { | ||
| MachineBasicBlock &MBB = *MBBi; | ||
| Changed |= attemptToBalignSmallLoop(MF, MBB); | ||
| } | ||
| return Changed; | ||
| } | ||
|
|
||
| } // namespace | ||
|
|
||
| INITIALIZE_PASS(HexagonLoopAlign, "hexagon-loop-align", | ||
| "Hexagon LoopAlign pass", false, false) | ||
|
|
||
| //===----------------------------------------------------------------------===// | ||
| // Public Constructor Functions | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| FunctionPass *llvm::createHexagonLoopAlign() { return new HexagonLoopAlign(); } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| ; RUN: llc -march=hexagon -O3 < %s | FileCheck %s -check-prefix=BALIGN | ||
| ; BALIGN: .p2align{{.*}}5 | ||
|
|
||
| ; The test for checking the alignment of 'for.body4.for.body4_crit_edge' basic block | ||
|
|
||
| define dso_local void @foo(i32 %nCol, i32 %nRow, ptr nocapture %resMat) local_unnamed_addr { | ||
| entry: | ||
| %shl = shl i32 %nRow, 2 | ||
| %cmp36 = icmp sgt i32 %nRow, 0 | ||
| %0 = add i32 %nCol, -1 | ||
| %.inv = icmp slt i32 %0, 1 | ||
| %1 = select i1 %.inv, i32 1, i32 %nCol | ||
| br label %Outerloop | ||
|
|
||
| Outerloop: ; preds = %for.end7, %entry | ||
| %r12.0 = phi i32 [ 0, %entry ], [ %inc8, %for.end7 ] | ||
| %r7_6.0 = phi i64 [ undef, %entry ], [ %r7_6.1.lcssa, %for.end7 ] | ||
| %r0i.0 = phi i32 [ undef, %entry ], [ %r0i.1.lcssa, %for.end7 ] | ||
| %r5.0 = phi ptr [ %resMat, %entry ], [ %r5.1.lcssa, %for.end7 ] | ||
| %r8.0 = phi i32 [ %shl, %entry ], [ %r8.1.lcssa, %for.end7 ] | ||
| br i1 %cmp36, label %for.body.lr.ph, label %for.end7 | ||
|
|
||
| for.body.lr.ph: ; preds = %Outerloop | ||
| %cmp332 = icmp eq i32 %r12.0, 0 | ||
| %exitcond.peel = icmp eq i32 %r12.0, 1 | ||
| br label %for.body | ||
|
|
||
| for.body: ; preds = %for.end, %for.body.lr.ph | ||
| %r8.141 = phi i32 [ %r8.0, %for.body.lr.ph ], [ %add, %for.end ] | ||
| %r5.140 = phi ptr [ %r5.0, %for.body.lr.ph ], [ %add.ptr, %for.end ] | ||
| %i.039 = phi i32 [ 0, %for.body.lr.ph ], [ %inc6, %for.end ] | ||
| %r0i.138 = phi i32 [ %r0i.0, %for.body.lr.ph ], [ %4, %for.end ] | ||
| %r7_6.137 = phi i64 [ %r7_6.0, %for.body.lr.ph ], [ %r7_6.2.lcssa, %for.end ] | ||
| %add = add nsw i32 %r8.141, %shl | ||
| br i1 %cmp332, label %for.end, label %for.body4.peel | ||
|
|
||
| for.body4.peel: ; preds = %for.body | ||
| %r1i.0.in.peel = inttoptr i32 %r8.141 to ptr | ||
| %r1i.0.peel = load i32, ptr %r1i.0.in.peel, align 4 | ||
| %2 = tail call i64 @llvm.hexagon.M2.dpmpyss.nac.s0(i64 %r7_6.137, i32 %r1i.0.peel, i32 %r0i.138) | ||
| br i1 %exitcond.peel, label %for.end, label %for.body4.preheader.peel.newph | ||
|
|
||
| for.body4.preheader.peel.newph: ; preds = %for.body4.peel | ||
| %r1i.0.in = inttoptr i32 %add to ptr | ||
| %r1i.0 = load i32, ptr %r1i.0.in, align 4 | ||
| br label %for.body4 | ||
|
|
||
| for.body4: ; preds = %for.body4.for.body4_crit_edge, %for.body4.preheader.peel.newph | ||
| %inc.phi = phi i32 [ %inc.0, %for.body4.for.body4_crit_edge ], [ 2, %for.body4.preheader.peel.newph ] | ||
| %r7_6.233 = phi i64 [ %3, %for.body4.for.body4_crit_edge ], [ %2, %for.body4.preheader.peel.newph ] | ||
| %3 = tail call i64 @llvm.hexagon.M2.dpmpyss.nac.s0(i64 %r7_6.233, i32 %r1i.0, i32 %r0i.138) | ||
| %exitcond = icmp eq i32 %inc.phi, %r12.0 | ||
| br i1 %exitcond, label %for.end.loopexit, label %for.body4.for.body4_crit_edge | ||
|
|
||
| for.body4.for.body4_crit_edge: ; preds = %for.body4 | ||
| %inc.0 = add nuw nsw i32 %inc.phi, 1 | ||
| br label %for.body4 | ||
|
|
||
| for.end.loopexit: ; preds = %for.body4 | ||
| br label %for.end | ||
|
|
||
| for.end: ; preds = %for.end.loopexit, %for.body4.peel, %for.body | ||
| %r7_6.2.lcssa = phi i64 [ %r7_6.137, %for.body ], [ %2, %for.body4.peel ], [ %3, %for.end.loopexit ] | ||
| %4 = tail call i32 @llvm.hexagon.S2.clbp(i64 %r7_6.2.lcssa) | ||
| store i32 %4, ptr %r5.140, align 4 | ||
| %add.ptr = getelementptr inbounds i8, ptr %r5.140, i32 undef | ||
| %inc6 = add nuw nsw i32 %i.039, 1 | ||
| %exitcond47 = icmp eq i32 %inc6, %nRow | ||
| br i1 %exitcond47, label %for.end7.loopexit, label %for.body | ||
|
|
||
| for.end7.loopexit: ; preds = %for.end | ||
| br label %for.end7 | ||
|
|
||
| for.end7: ; preds = %for.end7.loopexit, %Outerloop | ||
| %r7_6.1.lcssa = phi i64 [ %r7_6.0, %Outerloop ], [ %r7_6.2.lcssa, %for.end7.loopexit ] | ||
| %r0i.1.lcssa = phi i32 [ %r0i.0, %Outerloop ], [ %4, %for.end7.loopexit ] | ||
| %r5.1.lcssa = phi ptr [ %r5.0, %Outerloop ], [ %add.ptr, %for.end7.loopexit ] | ||
| %r8.1.lcssa = phi i32 [ %r8.0, %Outerloop ], [ %add, %for.end7.loopexit ] | ||
| %inc8 = add nuw i32 %r12.0, 1 | ||
| %exitcond48 = icmp eq i32 %inc8, %1 | ||
| br i1 %exitcond48, label %if.end, label %Outerloop | ||
|
|
||
| if.end: ; preds = %for.end7 | ||
| ret void | ||
| } | ||
|
|
||
| ; Function Attrs: nounwind readnone | ||
| declare i64 @llvm.hexagon.M2.dpmpyss.nac.s0(i64, i32, i32) | ||
|
|
||
| ; Function Attrs: nounwind readnone | ||
| declare i32 @llvm.hexagon.S2.clbp(i64) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,115 @@ | ||
| ; RUN: llc -march=hexagon -mcpu=hexagonv73 -O2 -mattr=+hvxv73,hvx-length64b \ | ||
| ; RUN: -debug-only=hexagon-loop-align 2>&1 < %s | FileCheck %s | ||
| ; Validate that there are 4 bundles in the loop. | ||
|
|
||
| ; CHECK: Loop Align Pass: | ||
| ; CHECK: Bundle Count : 4 | ||
| ; CHECK: .p2align{{.*}}5 | ||
|
|
||
| ; Function Attrs: nounwind | ||
| define void @ham(ptr noalias nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3, ptr noalias nocapture %arg4, i32 %arg5) #0 { | ||
| bb: | ||
| %ashr = ashr i32 %arg3, 2 | ||
| %ashr6 = ashr i32 %arg3, 1 | ||
| %add = add nsw i32 %ashr6, %ashr | ||
| %icmp = icmp sgt i32 %arg2, 0 | ||
| br i1 %icmp, label %bb7, label %bb61 | ||
|
|
||
| bb7: ; preds = %bb | ||
| %sdiv = sdiv i32 %arg1, 64 | ||
| %icmp8 = icmp sgt i32 %arg1, 63 | ||
| br label %bb9 | ||
|
|
||
| bb9: ; preds = %bb57, %bb7 | ||
| %phi = phi i32 [ 0, %bb7 ], [ %add58, %bb57 ] | ||
| %ashr10 = ashr exact i32 %phi, 1 | ||
| %mul = mul nsw i32 %ashr10, %arg3 | ||
| br i1 %icmp8, label %bb11, label %bb57 | ||
|
|
||
| bb11: ; preds = %bb9 | ||
| %add12 = add nsw i32 %phi, 1 | ||
| %mul13 = mul nsw i32 %add12, %arg5 | ||
| %mul14 = mul nsw i32 %phi, %arg5 | ||
| %add15 = add i32 %add, %mul | ||
| %add16 = add i32 %mul, %ashr | ||
| %add17 = add i32 %mul, %ashr6 | ||
| %getelementptr = getelementptr inbounds i8, ptr %arg4, i32 %mul13 | ||
| %getelementptr18 = getelementptr inbounds i8, ptr %arg4, i32 %mul14 | ||
| %getelementptr19 = getelementptr inbounds i16, ptr %arg, i32 %add15 | ||
| %getelementptr20 = getelementptr inbounds i16, ptr %arg, i32 %add16 | ||
| %getelementptr21 = getelementptr inbounds i16, ptr %arg, i32 %add17 | ||
| %getelementptr22 = getelementptr inbounds i16, ptr %arg, i32 %mul | ||
| %bitcast = bitcast ptr %getelementptr to ptr | ||
| %bitcast23 = bitcast ptr %getelementptr18 to ptr | ||
| %bitcast24 = bitcast ptr %getelementptr19 to ptr | ||
| %bitcast25 = bitcast ptr %getelementptr20 to ptr | ||
| %bitcast26 = bitcast ptr %getelementptr21 to ptr | ||
| %bitcast27 = bitcast ptr %getelementptr22 to ptr | ||
| br label %bb28 | ||
|
|
||
| bb28: ; preds = %bb28, %bb11 | ||
| %phi29 = phi i32 [ 0, %bb11 ], [ %add54, %bb28 ] | ||
| %phi30 = phi ptr [ %bitcast27, %bb11 ], [ %getelementptr36, %bb28 ] | ||
| %phi31 = phi ptr [ %bitcast26, %bb11 ], [ %getelementptr37, %bb28 ] | ||
| %phi32 = phi ptr [ %bitcast25, %bb11 ], [ %getelementptr39, %bb28 ] | ||
| %phi33 = phi ptr [ %bitcast24, %bb11 ], [ %getelementptr41, %bb28 ] | ||
| %phi34 = phi ptr [ %bitcast, %bb11 ], [ %getelementptr53, %bb28 ] | ||
| %phi35 = phi ptr [ %bitcast23, %bb11 ], [ %getelementptr52, %bb28 ] | ||
| %getelementptr36 = getelementptr inbounds <16 x i32>, ptr %phi30, i32 1 | ||
| %load = load <16 x i32>, ptr %phi30, align 64 | ||
| %getelementptr37 = getelementptr inbounds <16 x i32>, ptr %phi31, i32 1 | ||
| %load38 = load <16 x i32>, ptr %phi31, align 64 | ||
| %getelementptr39 = getelementptr inbounds <16 x i32>, ptr %phi32, i32 1 | ||
| %load40 = load <16 x i32>, ptr %phi32, align 64 | ||
| %getelementptr41 = getelementptr inbounds <16 x i32>, ptr %phi33, i32 1 | ||
| %load42 = load <16 x i32>, ptr %phi33, align 64 | ||
| %call = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %load, <16 x i32> %load38) | ||
| %call43 = tail call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %load, <16 x i32> %load38) | ||
| %call44 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %load40, <16 x i32> %load42) | ||
| %call45 = tail call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %load40, <16 x i32> %load42) | ||
| %call46 = tail call <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32> %call, <16 x i32> %call44) | ||
| %call47 = tail call <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32> %call, <16 x i32> %call44) | ||
| %call48 = tail call <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32> %call43, <16 x i32> %call45) | ||
| %call49 = tail call <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32> %call43, <16 x i32> %call45) | ||
| %call50 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %call47, <16 x i32> %call46) | ||
| %call51 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %call49, <16 x i32> %call48) | ||
| %getelementptr52 = getelementptr inbounds <16 x i32>, ptr %phi35, i32 1 | ||
| store <16 x i32> %call50, ptr %phi35, align 64 | ||
| %getelementptr53 = getelementptr inbounds <16 x i32>, ptr %phi34, i32 1 | ||
| store <16 x i32> %call51, ptr %phi34, align 64 | ||
| %add54 = add nsw i32 %phi29, 1 | ||
| %icmp55 = icmp slt i32 %add54, %sdiv | ||
| br i1 %icmp55, label %bb28, label %bb56 | ||
|
|
||
| bb56: ; preds = %bb28 | ||
| br label %bb57 | ||
|
|
||
| bb57: ; preds = %bb56, %bb9 | ||
| %add58 = add nsw i32 %phi, 2 | ||
| %icmp59 = icmp slt i32 %add58, %arg2 | ||
| br i1 %icmp59, label %bb9, label %bb60 | ||
|
|
||
| bb60: ; preds = %bb57 | ||
| br label %bb61 | ||
|
|
||
| bb61: ; preds = %bb60, %bb | ||
| ret void | ||
| } | ||
|
|
||
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) | ||
| declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>) #1 | ||
|
|
||
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) | ||
| declare <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32>, <16 x i32>) #1 | ||
|
|
||
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) | ||
| declare <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32>, <16 x i32>) #1 | ||
|
|
||
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) | ||
| declare <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32>, <16 x i32>) #1 | ||
|
|
||
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) | ||
| declare <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32>, <16 x i32>) #1 | ||
|
|
||
| attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,130 @@ | ||
| # RUN: llc -march=hexagon -O3 -run-pass hexagon-loop-align -o - %s\ | ||
| # RUN: -debug-only=hexagon-loop-align -verify-machineinstrs 2>&1 | FileCheck %s | ||
|
|
||
| # Test that we only count til endloop instruction and we align this | ||
| # loop to 32. | ||
| # CHECK: Loop Align Pass: | ||
| # CHECK: Instruction Count : 16 | ||
| # CHECK: bb.5 (align 32) | ||
| --- | ||
| name: fred | ||
| tracksRegLiveness: true | ||
|
|
||
| body: | | ||
| bb.0: | ||
| successors: %bb.1(0x50000000), %bb.8(0x30000000) | ||
| liveins: $r0, $r1, $r2, $r3, $r4, $r5 | ||
| renamable $p0 = C2_cmpgti renamable $r2, 0 | ||
| J2_jumpf killed renamable $p0, %bb.8, implicit-def dead $pc | ||
| J2_jump %bb.1, implicit-def dead $pc | ||
| bb.1: | ||
| successors: %bb.2(0x80000000) | ||
| liveins: $r0, $r1, $r2, $r3, $r4, $r5 | ||
| renamable $r7 = A2_addi killed renamable $r2, 1 | ||
| renamable $r8 = S2_asr_i_r renamable $r1, 31 | ||
| renamable $p0 = C2_cmpgti renamable $r1, 63 | ||
| renamable $r2 = S2_asr_i_r renamable $r3, 2 | ||
| renamable $r6 = S2_asr_i_r renamable $r3, 1 | ||
| renamable $r9 = S2_lsr_i_r killed renamable $r7, 1 | ||
| renamable $r1 = S2_lsr_i_r_acc killed renamable $r1, killed renamable $r8, 26 | ||
| renamable $r7 = A2_tfrsi 0 | ||
| renamable $r1 = S2_asr_i_r killed renamable $r1, 6 | ||
| J2_loop1r %bb.2, killed renamable $r9, implicit-def $lc1, implicit-def $sa1 | ||
| renamable $r8 = nsw A2_add renamable $r6, renamable $r2 | ||
| bb.2: | ||
| successors: %bb.3(0x40000000), %bb.7(0x40000000) | ||
| liveins: $p0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8 | ||
| J2_jumpf renamable $p0, %bb.7, implicit-def dead $pc | ||
| J2_jump %bb.3, implicit-def dead $pc | ||
| bb.3: | ||
| successors: %bb.4(0x80000000) | ||
| liveins: $p0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8 | ||
| renamable $r13 = exact S2_asr_i_r renamable $r7, 1 | ||
| renamable $r12 = COPY renamable $r4 | ||
| renamable $r9 = COPY renamable $r4 | ||
| renamable $r14 = nsw A2_addi renamable $r7, 1 | ||
| renamable $r15 = nsw M2_mpyi killed renamable $r13, renamable $r3 | ||
| renamable $r9 = M2_maci killed renamable $r9, killed renamable $r14, renamable $r5 | ||
| renamable $r13 = A2_add renamable $r8, renamable $r15 | ||
| renamable $r28 = A2_add renamable $r15, renamable $r2 | ||
| renamable $r10 = A2_add renamable $r15, renamable $r6 | ||
| renamable $r12 = M2_maci killed renamable $r12, renamable $r7, renamable $r5 | ||
| renamable $r13 = S2_addasl_rrri renamable $r0, killed renamable $r13, 1 | ||
| renamable $r14 = S2_addasl_rrri renamable $r0, killed renamable $r15, 1 | ||
| renamable $r15 = S2_addasl_rrri renamable $r0, killed renamable $r28, 1 | ||
| renamable $r28 = S2_addasl_rrri renamable $r0, killed renamable $r10, 1 | ||
| bb.4: | ||
| successors: %bb.5(0x40000000), %bb.6(0x40000000) | ||
| liveins: $p0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12, $r13, $r14, $r15, $r28 | ||
| renamable $v0, renamable $r14 = V6_vL32b_pi killed renamable $r14, 64 | ||
| renamable $p1 = C2_cmpgtui renamable $r1, 1 | ||
| renamable $r10 = A2_addi renamable $r1, -1 | ||
| renamable $v2, renamable $r28 = V6_vL32b_pi killed renamable $r28, 64 | ||
| renamable $v1 = V6_vaddh renamable $v0, renamable $v2 | ||
| renamable $v3, renamable $r15 = V6_vL32b_pi killed renamable $r15, 64 | ||
| renamable $v0 = V6_vsubh killed renamable $v0, killed renamable $v2 | ||
| J2_loop0r %bb.5, killed renamable $r10, implicit-def $lc0, implicit-def $sa0, implicit-def $usr | ||
| renamable $v4, renamable $r13 = V6_vL32b_pi killed renamable $r13, 64 | ||
| renamable $v2 = V6_vaddh renamable $v3, renamable $v4 | ||
| J2_jumpf killed renamable $p1, %bb.6, implicit-def $pc | ||
| J2_jump %bb.5, implicit-def $pc | ||
| bb.5: | ||
| successors: %bb.5(0x7c000000), %bb.6(0x04000000) | ||
| liveins: $p0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12, $r13, $r14, $r15, $r28, $v0, $v1, $v2, $v3, $v4 | ||
| renamable $v3 = V6_vsubh killed renamable $v3, killed renamable $v4 | ||
| renamable $v4, renamable $r14 = V6_vL32b_pi killed renamable $r14, 64 | ||
| renamable $v5 = V6_vnavgh renamable $v1, renamable $v2 | ||
| renamable $v1 = V6_vavgh killed renamable $v1, killed renamable $v2 | ||
| renamable $v2, renamable $r28 = V6_vL32b_pi killed renamable $r28, 64 | ||
| renamable $v1 = V6_vsathub killed renamable $v5, killed renamable $v1 | ||
| renamable $v5 = V6_vnavgh renamable $v0, renamable $v3 | ||
| renamable $v6 = V6_vavgh killed renamable $v0, killed renamable $v3 | ||
| renamable $r12 = V6_vS32b_pi killed renamable $r12, 64, killed renamable $v1 | ||
| renamable $v1 = V6_vaddh renamable $v4, renamable $v2 | ||
| renamable $v3, renamable $r15 = V6_vL32b_pi killed renamable $r15, 64 | ||
| renamable $v0 = V6_vsubh killed renamable $v4, killed renamable $v2 | ||
| renamable $v4, renamable $r13 = V6_vL32b_pi killed renamable $r13, 64 | ||
| renamable $v2 = V6_vaddh renamable $v3, renamable $v4 | ||
| renamable $v5 = V6_vsathub killed renamable $v5, killed renamable $v6 | ||
| renamable $r9 = V6_vS32b_pi killed renamable $r9, 64, killed renamable $v5 | ||
| ENDLOOP0 %bb.5, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 | ||
| J2_jump %bb.6, implicit-def $pc | ||
| bb.6: | ||
| successors: %bb.7(0x80000000) | ||
| liveins: $p0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12, $v0, $v1, $v2, $v3, $v4 | ||
| renamable $v3 = V6_vsubh killed renamable $v3, killed renamable $v4 | ||
| renamable $v4 = V6_vavgh renamable $v1, renamable $v2 | ||
| renamable $v1 = V6_vnavgh killed renamable $v1, killed renamable $v2 | ||
| renamable $v2 = V6_vavgh renamable $v0, renamable $v3 | ||
| renamable $v0 = V6_vnavgh killed renamable $v0, killed renamable $v3 | ||
| renamable $v1 = V6_vsathub killed renamable $v1, killed renamable $v4 | ||
| dead renamable $r12 = V6_vS32b_pi killed renamable $r12, 64, killed renamable $v1 | ||
| renamable $v0 = V6_vsathub killed renamable $v0, killed renamable $v2 | ||
| dead renamable $r9 = V6_vS32b_pi killed renamable $r9, 64, killed renamable $v0 | ||
| J2_jump %bb.7, implicit-def $pc | ||
| bb.7: | ||
| successors: %bb.2(0x7c000000), %bb.8(0x04000000) | ||
| liveins: $p0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8 | ||
| renamable $r7 = nsw A2_addi killed renamable $r7, 2 | ||
| ENDLOOP1 %bb.2, implicit-def $pc, implicit-def $lc1, implicit $sa1, implicit $lc1 | ||
| J2_jump %bb.8, implicit-def dead $pc | ||
| bb.8: | ||
| PS_jmpret $r31, implicit-def dead $pc | ||
| ... |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,117 @@ | ||
| ; RUN: llc -march=hexagon -mcpu=hexagonv73 -O2 -mattr=+hvxv73,hvx-length64b < %s | FileCheck %s | ||
| ; CHECK: .p2align{{.*}}5 | ||
|
|
||
| ; Function Attrs: nounwind | ||
| define void @wobble(ptr noalias nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3, ptr noalias nocapture %arg4, i32 %arg5) #0 { | ||
| bb: | ||
| %ashr = ashr i32 %arg3, 2 | ||
| %ashr6 = ashr i32 %arg3, 1 | ||
| %add = add nsw i32 %ashr6, %ashr | ||
| %icmp = icmp sgt i32 %arg2, 0 | ||
| br i1 %icmp, label %bb7, label %bb61 | ||
|
|
||
| bb7: ; preds = %bb | ||
| %sdiv = sdiv i32 %arg1, 64 | ||
| %icmp8 = icmp sgt i32 %arg1, 63 | ||
| br label %bb9 | ||
|
|
||
| bb9: ; preds = %bb57, %bb7 | ||
| %phi = phi i32 [ 0, %bb7 ], [ %add58, %bb57 ] | ||
| %ashr10 = ashr exact i32 %phi, 1 | ||
| %mul = mul nsw i32 %ashr10, %arg3 | ||
| br i1 %icmp8, label %bb11, label %bb57 | ||
|
|
||
| bb11: ; preds = %bb9 | ||
| %add12 = add nsw i32 %phi, 1 | ||
| %mul13 = mul nsw i32 %add12, %arg5 | ||
| %mul14 = mul nsw i32 %phi, %arg5 | ||
| %add15 = add i32 %add, %mul | ||
| %add16 = add i32 %mul, %ashr | ||
| %add17 = add i32 %mul, %ashr6 | ||
| %getelementptr = getelementptr inbounds i8, ptr %arg4, i32 %mul13 | ||
| %getelementptr18 = getelementptr inbounds i8, ptr %arg4, i32 %mul14 | ||
| %getelementptr19 = getelementptr inbounds i16, ptr %arg, i32 %add15 | ||
| %getelementptr20 = getelementptr inbounds i16, ptr %arg, i32 %add16 | ||
| %getelementptr21 = getelementptr inbounds i16, ptr %arg, i32 %add17 | ||
| %getelementptr22 = getelementptr inbounds i16, ptr %arg, i32 %mul | ||
| %bitcast = bitcast ptr %getelementptr to ptr | ||
| %bitcast23 = bitcast ptr %getelementptr18 to ptr | ||
| %bitcast24 = bitcast ptr %getelementptr19 to ptr | ||
| %bitcast25 = bitcast ptr %getelementptr20 to ptr | ||
| %bitcast26 = bitcast ptr %getelementptr21 to ptr | ||
| %bitcast27 = bitcast ptr %getelementptr22 to ptr | ||
| br label %bb28 | ||
|
|
||
| bb28: ; preds = %bb28, %bb11 | ||
| %phi29 = phi i32 [ 0, %bb11 ], [ %add54, %bb28 ] | ||
| %phi30 = phi ptr [ %bitcast27, %bb11 ], [ %getelementptr36, %bb28 ] | ||
| %phi31 = phi ptr [ %bitcast26, %bb11 ], [ %getelementptr37, %bb28 ] | ||
| %phi32 = phi ptr [ %bitcast25, %bb11 ], [ %getelementptr39, %bb28 ] | ||
| %phi33 = phi ptr [ %bitcast24, %bb11 ], [ %getelementptr41, %bb28 ] | ||
| %phi34 = phi ptr [ %bitcast, %bb11 ], [ %getelementptr53, %bb28 ] | ||
| %phi35 = phi ptr [ %bitcast23, %bb11 ], [ %getelementptr52, %bb28 ] | ||
| %getelementptr36 = getelementptr inbounds <16 x i32>, ptr %phi30, i32 1 | ||
| %load = load <16 x i32>, ptr %phi30, align 64, !tbaa !1 | ||
| %getelementptr37 = getelementptr inbounds <16 x i32>, ptr %phi31, i32 1 | ||
| %load38 = load <16 x i32>, ptr %phi31, align 64, !tbaa !1 | ||
| %getelementptr39 = getelementptr inbounds <16 x i32>, ptr %phi32, i32 1 | ||
| %load40 = load <16 x i32>, ptr %phi32, align 64, !tbaa !1 | ||
| %getelementptr41 = getelementptr inbounds <16 x i32>, ptr %phi33, i32 1 | ||
| %load42 = load <16 x i32>, ptr %phi33, align 64, !tbaa !1 | ||
| %call = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %load, <16 x i32> %load38) | ||
| %call43 = tail call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %load, <16 x i32> %load38) | ||
| %call44 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %load40, <16 x i32> %load42) | ||
| %call45 = tail call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %load40, <16 x i32> %load42) | ||
| %call46 = tail call <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32> %call, <16 x i32> %call44) | ||
| %call47 = tail call <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32> %call, <16 x i32> %call44) | ||
| %call48 = tail call <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32> %call43, <16 x i32> %call45) | ||
| %call49 = tail call <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32> %call43, <16 x i32> %call45) | ||
| %call50 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %call47, <16 x i32> %call46) | ||
| %call51 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %call49, <16 x i32> %call48) | ||
| %getelementptr52 = getelementptr inbounds <16 x i32>, ptr %phi35, i32 1 | ||
| store <16 x i32> %call50, ptr %phi35, align 64, !tbaa !1 | ||
| %getelementptr53 = getelementptr inbounds <16 x i32>, ptr %phi34, i32 1 | ||
| store <16 x i32> %call51, ptr %phi34, align 64, !tbaa !1 | ||
| %add54 = add nsw i32 %phi29, 1 | ||
| %icmp55 = icmp slt i32 %add54, %sdiv | ||
| br i1 %icmp55, label %bb28, label %bb56 | ||
|
|
||
| bb56: ; preds = %bb28 | ||
| br label %bb57 | ||
|
|
||
| bb57: ; preds = %bb56, %bb9 | ||
| %add58 = add nsw i32 %phi, 2 | ||
| %icmp59 = icmp slt i32 %add58, %arg2 | ||
| br i1 %icmp59, label %bb9, label %bb60 | ||
|
|
||
| bb60: ; preds = %bb57 | ||
| br label %bb61 | ||
|
|
||
| bb61: ; preds = %bb60, %bb | ||
| ret void | ||
| } | ||
|
|
||
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) | ||
| declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>) #1 | ||
|
|
||
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) | ||
| declare <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32>, <16 x i32>) #1 | ||
|
|
||
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) | ||
| declare <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32>, <16 x i32>) #1 | ||
|
|
||
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) | ||
| declare <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32>, <16 x i32>) #1 | ||
|
|
||
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) | ||
| declare <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32>, <16 x i32>) #1 | ||
|
|
||
| attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } | ||
|
|
||
| !llvm.ident = !{!0} | ||
|
|
||
| !0 = !{!"Clang 3.1"} | ||
| !1 = !{!2, !2, i64 0} | ||
| !2 = !{!"omnipotent char", !3, i64 0} | ||
| !3 = !{!"Simple C/C++ TBAA"} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 | ||
|
|
||
| ;; Check that -enable-loop-header-duplication at Oz enables certain types of | ||
| ;; optimizations, for example replacing the loop body w/ a call to memset. If | ||
| ;; loop idiom recognition begins to recognize unrotated loops, this test will | ||
| ;; need to be updated. | ||
|
|
||
| ; RUN: opt -passes='default<Oz>' -S < %s | FileCheck %s --check-prefix=NOROTATION | ||
| ; RUN: opt -passes='default<Oz>' -S -enable-loop-header-duplication < %s | FileCheck %s --check-prefix=ROTATION | ||
| ; RUN: opt -passes='default<O2>' -S < %s | FileCheck %s --check-prefix=ROTATION | ||
|
|
||
| define void @test(i8* noalias nonnull align 1 %start, i8* %end) unnamed_addr { | ||
| ; NOROTATION-LABEL: define void @test( | ||
| ; NOROTATION-SAME: ptr noalias nonnull writeonly align 1 [[START:%.*]], ptr readnone [[END:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] { | ||
| ; NOROTATION-NEXT: entry: | ||
| ; NOROTATION-NEXT: br label [[LOOP_HEADER:%.*]] | ||
| ; NOROTATION: loop.header: | ||
| ; NOROTATION-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] | ||
| ; NOROTATION-NEXT: [[_12_I:%.*]] = icmp eq ptr [[PTR_IV]], [[END]] | ||
| ; NOROTATION-NEXT: br i1 [[_12_I]], label [[EXIT:%.*]], label [[LOOP_LATCH]] | ||
| ; NOROTATION: loop.latch: | ||
| ; NOROTATION-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 1 | ||
| ; NOROTATION-NEXT: store i8 1, ptr [[PTR_IV]], align 1 | ||
| ; NOROTATION-NEXT: br label [[LOOP_HEADER]] | ||
| ; NOROTATION: exit: | ||
| ; NOROTATION-NEXT: ret void | ||
| ; | ||
| ; ROTATION-LABEL: define void @test( | ||
| ; ROTATION-SAME: ptr noalias nonnull writeonly align 1 [[START:%.*]], ptr readnone [[END:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] { | ||
| ; ROTATION-NEXT: entry: | ||
| ; ROTATION-NEXT: [[_12_I1:%.*]] = icmp eq ptr [[START]], [[END]] | ||
| ; ROTATION-NEXT: br i1 [[_12_I1]], label [[EXIT:%.*]], label [[LOOP_LATCH_PREHEADER:%.*]] | ||
| ; ROTATION: loop.latch.preheader: | ||
| ; ROTATION-NEXT: [[END3:%.*]] = ptrtoint ptr [[END]] to i64 | ||
| ; ROTATION-NEXT: [[START4:%.*]] = ptrtoint ptr [[START]] to i64 | ||
| ; ROTATION-NEXT: [[TMP0:%.*]] = sub i64 [[END3]], [[START4]] | ||
| ; ROTATION-NEXT: tail call void @llvm.memset.p0.i64(ptr nonnull align 1 [[START]], i8 1, i64 [[TMP0]], i1 false) | ||
| ; ROTATION-NEXT: br label [[EXIT]] | ||
| ; ROTATION: exit: | ||
| ; ROTATION-NEXT: ret void | ||
| ; | ||
| entry: | ||
| br label %loop.header | ||
|
|
||
| loop.header: | ||
| %ptr.iv = phi i8* [ %start, %entry ], [ %ptr.iv.next, %loop.latch ] | ||
| %_12.i = icmp eq i8* %ptr.iv, %end | ||
| br i1 %_12.i, label %exit, label %loop.latch | ||
|
|
||
| loop.latch: | ||
| %ptr.iv.next = getelementptr inbounds i8, i8* %ptr.iv, i64 1 | ||
| store i8 1, i8* %ptr.iv, align 1 | ||
| br label %loop.header | ||
|
|
||
| exit: | ||
| ret void | ||
| } |