Skip to content

Commit

Permalink
[Hexagon] Generate loop1 instruction for nested loops
Browse files Browse the repository at this point in the history
loop1 is for the outer loop and loop0 is for the inner loop.

Differential Revision: http://reviews.llvm.org/D9680

llvm-svn: 237266
  • Loading branch information
Brendon Cahoon committed May 13, 2015
1 parent ffc84e3 commit d11c92a
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 56 deletions.
139 changes: 83 additions & 56 deletions llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
Expand Up @@ -159,7 +159,7 @@ namespace {
MachineOperand *InitialValue,
const MachineOperand *Endvalue,
int64_t IVBump) const;

/// \brief Analyze the statements in a loop to determine if the loop
/// has a computable trip count and, if so, return a value that represents
/// the trip count expression.
Expand All @@ -179,15 +179,16 @@ namespace {

/// \brief Return true if the instruction is not valid within a hardware
/// loop.
bool isInvalidLoopOperation(const MachineInstr *MI) const;
bool isInvalidLoopOperation(const MachineInstr *MI,
bool IsInnerHWLoop) const;

/// \brief Return true if the loop contains an instruction that inhibits
/// using the hardware loop.
bool containsInvalidInstruction(MachineLoop *L) const;
bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;

/// \brief Given a loop, check if we can convert it to a hardware loop.
/// If so, then perform the conversion and return true.
bool convertToHardwareLoop(MachineLoop *L);
bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);

/// \brief Return true if the instruction is now dead.
bool isDead(const MachineInstr *MI,
Expand Down Expand Up @@ -307,18 +308,10 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
"Hexagon Hardware Loops", false, false)


/// \brief Returns true if the instruction is a hardware loop instruction.
static bool isHardwareLoop(const MachineInstr *MI) {
return MI->getOpcode() == Hexagon::J2_loop0r ||
MI->getOpcode() == Hexagon::J2_loop0i;
}

FunctionPass *llvm::createHexagonHardwareLoops() {
return new HexagonHardwareLoops();
}


bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");

Expand All @@ -329,12 +322,12 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
MDT = &getAnalysis<MachineDominatorTree>();
TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();

for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
I != E; ++I) {
MachineLoop *L = *I;
if (!L->getParentLoop())
Changed |= convertToHardwareLoop(L);
}
for (auto &L : *MLI)
if (!L->getParentLoop()) {
bool L0Used = false;
bool L1Used = false;
Changed |= convertToHardwareLoop(L, L0Used, L1Used);
}

return Changed;
}
Expand Down Expand Up @@ -467,27 +460,27 @@ HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
case Hexagon::C2_cmpeqi:
case Hexagon::C2_cmpeq:
case Hexagon::C2_cmpeqp:
Cmp = Comparison::Kind::EQ;
Cmp = Comparison::EQ;
break;
case Hexagon::C4_cmpneq:
case Hexagon::C4_cmpneqi:
Cmp = Comparison::Kind::NE;
Cmp = Comparison::NE;
break;
case Hexagon::C4_cmplte:
Cmp = Comparison::Kind::LEs;
Cmp = Comparison::LEs;
break;
case Hexagon::C4_cmplteu:
Cmp = Comparison::Kind::LEu;
Cmp = Comparison::LEu;
break;
case Hexagon::C2_cmpgtui:
case Hexagon::C2_cmpgtu:
case Hexagon::C2_cmpgtup:
Cmp = Comparison::Kind::GTu;
Cmp = Comparison::GTu;
break;
case Hexagon::C2_cmpgti:
case Hexagon::C2_cmpgt:
case Hexagon::C2_cmpgtp:
Cmp = Comparison::Kind::GTs;
Cmp = Comparison::GTs;
break;
default:
return (Comparison::Kind)0;
Expand Down Expand Up @@ -749,7 +742,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
DebugLoc DL;
if (InsertPos != PH->end())
InsertPos->getDebugLoc();
DL = InsertPos->getDebugLoc();

// If Start is an immediate and End is a register, the trip count
// will be "reg - imm". Hexagon's "subtract immediate" instruction
Expand Down Expand Up @@ -828,7 +821,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
(RegToImm ? TII->get(Hexagon::A2_subri) :
TII->get(Hexagon::A2_addi));
if (RegToReg || RegToImm) {
if (RegToReg || RegToImm) {
unsigned SubR = MRI->createVirtualRegister(IntRC);
MachineInstrBuilder SubIB =
BuildMI(*PH, InsertPos, DL, SubD, SubR);
Expand Down Expand Up @@ -902,51 +895,50 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
return new CountValue(CountValue::CV_Register, CountR, CountSR);
}


/// \brief Return true if the operation is invalid within hardware loop.
bool HexagonHardwareLoops::isInvalidLoopOperation(
const MachineInstr *MI) const {
bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
bool IsInnerHWLoop) const {

// Call is not allowed because the callee may use a hardware loop except for
// the case when the call never returns.
if (MI->getDesc().isCall() && MI->getOpcode() != Hexagon::CALLv3nr)
return true;

// do not allow nested hardware loops
if (isHardwareLoop(MI))
return true;

// check if the instruction defines a hardware loop register
// Check if the instruction defines a hardware loop register.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
unsigned R = MO.getReg();
if (R == Hexagon::LC0 || R == Hexagon::LC1 ||
R == Hexagon::SA0 || R == Hexagon::SA1)
if (IsInnerHWLoop && (R == Hexagon::LC0 || R == Hexagon::SA0 ||
R == Hexagon::LC1 || R == Hexagon::SA1))
return true;
if (!IsInnerHWLoop && (R == Hexagon::LC1 || R == Hexagon::SA1))
return true;
}
return false;
}


/// \brief - Return true if the loop contains an instruction that inhibits
/// the use of the hardware loop function.
bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
/// \brief Return true if the loop contains an instruction that inhibits
/// the use of the hardware loop instruction.
bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
bool IsInnerHWLoop) const {
const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
MachineBasicBlock *MBB = Blocks[i];
for (MachineBasicBlock::iterator
MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
const MachineInstr *MI = &*MII;
if (isInvalidLoopOperation(MI))
if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump(););
return true;
}
}
}
return false;
}


/// \brief Returns true if the instruction is dead. This was essentially
/// copied from DeadMachineInstructionElim::isDead, but with special cases
/// for inline asm, physical registers and instructions with side effects
Expand Down Expand Up @@ -1041,19 +1033,47 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
///
/// The code makes several assumptions about the representation of the loop
/// in llvm.
bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
bool &RecL0used,
bool &RecL1used) {
// This is just for sanity.
assert(L->getHeader() && "Loop without a header?");

bool Changed = false;
bool L0Used = false;
bool L1Used = false;

// Process nested loops first.
for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
Changed |= convertToHardwareLoop(*I);
for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
L0Used |= RecL0used;
L1Used |= RecL1used;
}

// If a nested loop has been converted, then we can't convert this loop.
if (Changed)
if (Changed && L0Used && L1Used)
return Changed;

unsigned LOOP_i;
unsigned LOOP_r;
unsigned ENDLOOP;

// Flag used to track loopN instruction:
// 1 - Hardware loop is being generated for the inner most loop.
// 0 - Hardware loop is being generated for the outer loop.
unsigned IsInnerHWLoop = 1;

if (L0Used) {
LOOP_i = Hexagon::J2_loop1i;
LOOP_r = Hexagon::J2_loop1r;
ENDLOOP = Hexagon::ENDLOOP1;
IsInnerHWLoop = 0;
} else {
LOOP_i = Hexagon::J2_loop0i;
LOOP_r = Hexagon::J2_loop0r;
ENDLOOP = Hexagon::ENDLOOP0;
}

#ifndef NDEBUG
// Stop trying after reaching the limit (if any).
int Limit = HWLoopLimit;
Expand All @@ -1065,10 +1085,10 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
#endif

// Does the loop contain any invalid instructions?
if (containsInvalidInstruction(L))
if (containsInvalidInstruction(L, IsInnerHWLoop))
return false;

MachineBasicBlock *LastMBB = L->getExitingBlock();
MachineBasicBlock *LastMBB = getExitingBlock(L);
// Don't generate hw loop if the loop has more than one exit.
if (!LastMBB)
return false;
Expand Down Expand Up @@ -1141,23 +1161,22 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
.addReg(TripCount->getReg(), 0, TripCount->getSubReg());
// Add the Loop instruction to the beginning of the loop.
BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r))
.addMBB(LoopStart)
BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
.addReg(CountReg);
} else {
assert(TripCount->isImm() && "Expecting immediate value for trip count");
// Add the Loop immediate instruction to the beginning of the loop,
// if the immediate fits in the instructions. Otherwise, we need to
// create a new virtual register.
int64_t CountImm = TripCount->getImm();
if (!TII->isValidOffset(Hexagon::J2_loop0i, CountImm)) {
if (!TII->isValidOffset(LOOP_i, CountImm)) {
unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
.addImm(CountImm);
BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r))
BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
.addMBB(LoopStart).addReg(CountReg);
} else
BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0i))
BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
.addMBB(LoopStart).addImm(CountImm);
}

Expand All @@ -1171,8 +1190,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {

// Replace the loop branch with an endloop instruction.
DebugLoc LastIDL = LastI->getDebugLoc();
BuildMI(*LastMBB, LastI, LastIDL,
TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);

// The loop ends with either:
// - a conditional branch followed by an unconditional branch, or
Expand Down Expand Up @@ -1200,6 +1218,15 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
removeIfDead(OldInsts[i]);

++NumHWLoops;

// Set RecL1used and RecL0used only after hardware loop has been
// successfully generated. Doing it earlier can cause wrong loop instruction
// to be used.
if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
RecL1used = true;
else
RecL0used = true;

return true;
}

Expand Down Expand Up @@ -1533,7 +1560,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
if (Header->pred_size() > 2) {
// Ensure that the header has only two predecessors: the preheader and
// the loop latch. Any additional predecessors of the header should
// join at the newly created preheader. Inspect all PHI nodes from the
// join at the newly created preheader. Inspect all PHI nodes from the
// header and create appropriate corresponding PHI nodes in the preheader.

for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
Expand Down
68 changes: 68 additions & 0 deletions llvm/test/CodeGen/Hexagon/hwloop-loop1.ll
@@ -0,0 +1,68 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
;
; Generate loop1 instruction for double loop sequence.

; CHECK: loop0(.LBB{{.}}_{{.}}, #100)
; CHECK: endloop0
; CHECK: loop1(.LBB{{.}}_{{.}}, #100)
; CHECK: loop0(.LBB{{.}}_{{.}}, #100)
; CHECK: endloop0
; CHECK: endloop1

define i32 @main() #0 {
entry:
%array = alloca [100 x i32], align 8
%doublearray = alloca [100 x [100 x i32]], align 8
%0 = bitcast [100 x i32]* %array to i8*
call void @llvm.lifetime.start(i64 400, i8* %0) #1
%1 = bitcast [100 x [100 x i32]]* %doublearray to i8*
call void @llvm.lifetime.start(i64 40000, i8* %1) #1
%arrayidx1 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %doublearray, i32 0, i32 10, i32 10
%arrayidx2.gep = getelementptr [100 x i32], [100 x i32]* %array, i32 0, i32 0
br label %for.body

for.body:
%2 = phi i32 [ undef, %entry ], [ %.pre, %for.body.for.body_crit_edge ]
%sum.031 = phi i32 [ undef, %entry ], [ %add, %for.body.for.body_crit_edge ]
%arrayidx2.phi = phi i32* [ %arrayidx2.gep, %entry ], [ %arrayidx2.inc, %for.body.for.body_crit_edge ]
%i.030 = phi i32 [ 1, %entry ], [ %phitmp, %for.body.for.body_crit_edge ]
%add = add nsw i32 %2, %sum.031
%exitcond33 = icmp eq i32 %i.030, 100
%arrayidx2.inc = getelementptr i32, i32* %arrayidx2.phi, i32 1
br i1 %exitcond33, label %for.cond7.preheader.preheader, label %for.body.for.body_crit_edge

for.cond7.preheader.preheader:
br label %for.cond7.preheader

for.body.for.body_crit_edge:
%.pre = load i32, i32* %arrayidx2.inc, align 4
%phitmp = add i32 %i.030, 1
br label %for.body

for.cond7.preheader:
%i.129 = phi i32 [ %inc16, %for.inc15 ], [ 0, %for.cond7.preheader.preheader ]
br label %for.body9

for.body9:
%j.028 = phi i32 [ 0, %for.cond7.preheader ], [ %inc13, %for.body9 ]
%arrayidx11 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %doublearray, i32 0, i32 %i.129, i32 %j.028
store i32 %add, i32* %arrayidx11, align 4
%inc13 = add nsw i32 %j.028, 1
%exitcond = icmp eq i32 %inc13, 100
br i1 %exitcond, label %for.inc15, label %for.body9

for.inc15:
%inc16 = add nsw i32 %i.129, 1
%exitcond32 = icmp eq i32 %inc16, 100
br i1 %exitcond32, label %for.end17, label %for.cond7.preheader

for.end17:
%3 = load i32, i32* %arrayidx1, align 8
call void @llvm.lifetime.end(i64 40000, i8* %1) #1
call void @llvm.lifetime.end(i64 400, i8* %0) #1
ret i32 %3
}

declare void @llvm.lifetime.start(i64, i8* nocapture) #1

declare void @llvm.lifetime.end(i64, i8* nocapture) #1

0 comments on commit d11c92a

Please sign in to comment.