1,064 changes: 1,064 additions & 0 deletions llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp

Large diffs are not rendered by default.

95 changes: 78 additions & 17 deletions llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
}
bool
HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
const HexagonRegisterInfo &TRI = getRegisterInfo();
const HexagonRegisterInfo &HRI = getRegisterInfo();
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
Expand All @@ -693,7 +693,7 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
switch (Opc) {
case Hexagon::ALIGNA:
BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI->getOperand(0).getReg())
.addReg(TRI.getFrameRegister())
.addReg(HRI.getFrameRegister())
.addImm(-MI->getOperand(1).getImm());
MBB.erase(MI);
return true;
Expand All @@ -718,15 +718,15 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
unsigned DstReg = MI->getOperand(0).getReg();
unsigned Src1Reg = MI->getOperand(1).getReg();
unsigned Src2Reg = MI->getOperand(2).getReg();
unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
.addReg(Src2SubHi);
BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
.addReg(Src2SubLo);
MBB.erase(MI);
MRI.clearKillFlags(Src1SubHi);
Expand All @@ -741,17 +741,17 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
unsigned Src1Reg = MI->getOperand(1).getReg();
unsigned Src2Reg = MI->getOperand(2).getReg();
unsigned Src3Reg = MI->getOperand(3).getReg();
unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
unsigned Src3SubHi = TRI.getSubReg(Src3Reg, Hexagon::subreg_hireg);
unsigned Src3SubLo = TRI.getSubReg(Src3Reg, Hexagon::subreg_loreg);
unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::subreg_hireg);
unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::subreg_loreg);
BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
.addReg(Src2SubHi).addReg(Src3SubHi);
BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
.addReg(Src2SubLo).addReg(Src3SubLo);
MBB.erase(MI);
MRI.clearKillFlags(Src1SubHi);
Expand All @@ -762,6 +762,30 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MRI.clearKillFlags(Src3SubLo);
return true;
}
case Hexagon::MUX64_rr: {
const MachineOperand &Op0 = MI->getOperand(0);
const MachineOperand &Op1 = MI->getOperand(1);
const MachineOperand &Op2 = MI->getOperand(2);
const MachineOperand &Op3 = MI->getOperand(3);
unsigned Rd = Op0.getReg();
unsigned Pu = Op1.getReg();
unsigned Rs = Op2.getReg();
unsigned Rt = Op3.getReg();
DebugLoc DL = MI->getDebugLoc();
unsigned K1 = getKillRegState(Op1.isKill());
unsigned K2 = getKillRegState(Op2.isKill());
unsigned K3 = getKillRegState(Op3.isKill());
if (Rd != Rs)
BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpt), Rd)
.addReg(Pu, (Rd == Rt) ? K1 : 0)
.addReg(Rs, K2);
if (Rd != Rt)
BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpf), Rd)
.addReg(Pu, K1)
.addReg(Rt, K3);
MBB.erase(MI);
return true;
}
case Hexagon::TCRETURNi:
MI->setDesc(get(Hexagon::J2_jump));
return true;
Expand Down Expand Up @@ -1285,7 +1309,44 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::TFR_FIA:
case Hexagon::INLINEASM:
return true;
}

case Hexagon::L2_ploadrbt_io:
case Hexagon::L2_ploadrbf_io:
case Hexagon::L2_ploadrubt_io:
case Hexagon::L2_ploadrubf_io:
case Hexagon::S2_pstorerbt_io:
case Hexagon::S2_pstorerbf_io:
case Hexagon::S4_storeirb_io:
case Hexagon::S4_storeirbt_io:
case Hexagon::S4_storeirbf_io:
return isUInt<6>(Offset);

case Hexagon::L2_ploadrht_io:
case Hexagon::L2_ploadrhf_io:
case Hexagon::L2_ploadruht_io:
case Hexagon::L2_ploadruhf_io:
case Hexagon::S2_pstorerht_io:
case Hexagon::S2_pstorerhf_io:
case Hexagon::S4_storeirh_io:
case Hexagon::S4_storeirht_io:
case Hexagon::S4_storeirhf_io:
return isShiftedUInt<6,1>(Offset);

case Hexagon::L2_ploadrit_io:
case Hexagon::L2_ploadrif_io:
case Hexagon::S2_pstorerit_io:
case Hexagon::S2_pstorerif_io:
case Hexagon::S4_storeiri_io:
case Hexagon::S4_storeirit_io:
case Hexagon::S4_storeirif_io:
return isShiftedUInt<6,2>(Offset);

case Hexagon::L2_ploadrdt_io:
case Hexagon::L2_ploadrdf_io:
case Hexagon::S2_pstorerdt_io:
case Hexagon::S2_pstorerdf_io:
return isShiftedUInt<6,3>(Offset);
} // switch

llvm_unreachable("No offset range is defined for this opcode. "
"Please define it in the above switch statement!");
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets",
cl::init(true), cl::Hidden, cl::ZeroOrMore,
cl::desc("Early expansion of MUX"));

static cl::opt<bool> EnableEarlyIf("hexagon-eif", cl::init(true), cl::Hidden,
cl::ZeroOrMore, cl::desc("Enable early if-conversion"));

static cl::opt<bool> EnableGenInsert("hexagon-insert", cl::init(true),
cl::Hidden, cl::desc("Generate \"insert\" instructions"));

Expand Down Expand Up @@ -78,6 +81,7 @@ namespace llvm {
FunctionPass *createHexagonCFGOptimizer();
FunctionPass *createHexagonCommonGEP();
FunctionPass *createHexagonCopyToCombine();
FunctionPass *createHexagonEarlyIfConversion();
FunctionPass *createHexagonExpandCondsets();
FunctionPass *createHexagonExpandPredSpillCode();
FunctionPass *createHexagonFixupHwLoops();
Expand Down Expand Up @@ -214,6 +218,8 @@ bool HexagonPassConfig::addInstSelector() {
printAndVerify("After hexagon peephole pass");
if (EnableGenInsert)
addPass(createHexagonGenInsert(), false);
if (EnableEarlyIf)
addPass(createHexagonEarlyIfConversion(), false);
}

return false;
Expand Down
412 changes: 412 additions & 0 deletions llvm/test/CodeGen/Hexagon/early-if-conversion-bug1.ll

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions llvm/test/CodeGen/Hexagon/early-if-phi-i1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
; RUN: llc < %s
; REQUIRES: asserts
; Check that the early if-conversion does not predicate block1 (where the
; join block has a phi node of type i1).

define i1 @foo(i32 %x, i32* %p) {
entry:
%c = icmp sgt i32 %x, 0
%c1 = icmp sgt i32 %x, 10
br i1 %c, label %block2, label %block1
block1:
store i32 1, i32* %p, align 4
br label %block2
block2:
%b = phi i1 [ 0, %entry ], [ %c1, %block1 ]
ret i1 %b
}
57 changes: 57 additions & 0 deletions llvm/test/CodeGen/Hexagon/early-if-spare.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
; RUN: llc -O2 -mcpu=hexagonv5 < %s | FileCheck %s
; Check if the three stores in the loop were predicated.
; CHECK: if{{.*}}memw
; CHECK: if{{.*}}memw
; CHECK: if{{.*}}memw

target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
target triple = "hexagon"

define void @fred(i32 %n, i32* %bp) nounwind {
entry:
%cmp16 = icmp eq i32 %n, 0
br i1 %cmp16, label %for.end, label %for.body.lr.ph

for.body.lr.ph: ; preds = %entry
%cmp2 = icmp ugt i32 %n, 32
br label %for.body

for.body: ; preds = %for.inc, %for.body.lr.ph
%i.017 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
%call = tail call i32 @foo(i32* %bp) nounwind
%call1 = tail call i32 @bar(i32* %bp) nounwind
br i1 %cmp2, label %if.then, label %if.else

if.then: ; preds = %for.body
%arrayidx = getelementptr inbounds i32, i32* %bp, i32 %i.017
store i32 %call, i32* %arrayidx, align 4, !tbaa !0
%add = add i32 %i.017, 2
%arrayidx3 = getelementptr inbounds i32, i32* %bp, i32 %add
store i32 %call1, i32* %arrayidx3, align 4, !tbaa !0
br label %for.inc

if.else: ; preds = %for.body
%or = or i32 %call1, %call
%arrayidx4 = getelementptr inbounds i32, i32* %bp, i32 %i.017
store i32 %or, i32* %arrayidx4, align 4, !tbaa !0
br label %for.inc

for.inc: ; preds = %if.then, %if.else
%inc = add i32 %i.017, 1
%exitcond = icmp eq i32 %inc, %n
br i1 %exitcond, label %for.end.loopexit, label %for.body

for.end.loopexit: ; preds = %for.inc
br label %for.end

for.end: ; preds = %for.end.loopexit, %entry
ret void
}

declare i32 @foo(i32*) nounwind

declare i32 @bar(i32*) nounwind

!0 = !{!"int", !1}
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA"}
75 changes: 75 additions & 0 deletions llvm/test/CodeGen/Hexagon/early-if.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
; RUN: llc -O2 -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
; Rely on the comments generated by llc. Check that "if.then" was predicated.
; CHECK: while.body13
; CHECK: if{{.*}}memd
; CHECK: while.end

%struct.1 = type { i32, i32 }
%struct.2 = type { [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [3 x i32], [24 x i32], [8 x %struct.1], [5 x i32] }

@A1 = global i64 zeroinitializer
@A2 = global i64 zeroinitializer
@B1 = global i32 zeroinitializer
@B2 = global i32 zeroinitializer
@C1 = global i8 zeroinitializer

declare i32 @llvm.hexagon.S2.cl0(i32) nounwind readnone
declare i32 @llvm.hexagon.S2.setbit.r(i32, i32) nounwind readnone
declare i64 @llvm.hexagon.M2.vmpy2s.s0(i32, i32) nounwind readnone
declare i64 @llvm.hexagon.M2.vmac2s.s0(i64, i32, i32) nounwind readnone
declare i64 @llvm.hexagon.A2.vaddws(i64, i64) nounwind readnone
declare i64 @llvm.hexagon.A2.vsubws(i64, i64) nounwind readnone
declare i32 @llvm.hexagon.A4.modwrapu(i32, i32) nounwind readnone

define void @foo(i32 %n, i64* %ptr) nounwind {
entry:
br label %while.body

while.body:
%count = phi i32 [ 0, %entry ], [ %next, %while.end ]
%idx = phi i32 [ 0, %entry ], [ %15, %while.end ]
%0 = load i32, i32* @B1, align 4
%1 = load i32, i32* @B2, align 8
%2 = and i32 %1, %0
br label %while.body13

while.body13: ; preds = %while.body, %if.end
%3 = phi i64 [ %13, %if.end ], [ 0, %while.body ]
%4 = phi i64 [ %14, %if.end ], [ 0, %while.body ]
%m = phi i32 [ %6, %if.end ], [ %2, %while.body ]
%5 = tail call i32 @llvm.hexagon.S2.cl0(i32 %m)
%6 = tail call i32 @llvm.hexagon.S2.setbit.r(i32 %m, i32 %5)
%cgep85 = getelementptr [10 x %struct.2], [10 x %struct.2]* inttoptr (i32 -121502345 to [10 x %struct.2]*), i32 0, i32 %idx
%cgep90 = getelementptr %struct.2, %struct.2* %cgep85, i32 0, i32 12, i32 %5
%7 = load i32, i32* %cgep90, align 4
%8 = tail call i64 @llvm.hexagon.M2.vmpy2s.s0(i32 %7, i32 %7)
%cgep91 = getelementptr %struct.2, %struct.2* %cgep85, i32 0, i32 13, i32 %5
%9 = load i32, i32* %cgep91, align 4
%10 = tail call i64 @llvm.hexagon.M2.vmac2s.s0(i64 %8, i32 %9, i32 %9)
%11 = load i8, i8* @C1, align 1
%and24 = and i8 %11, 1
%cmp = icmp eq i8 %and24, 0
br i1 %cmp, label %if.then, label %if.end

if.then: ; preds = %while.body13
%12 = tail call i64 @llvm.hexagon.A2.vaddws(i64 %3, i64 %10)
store i64 %12, i64* %ptr, align 8
br label %if.end

if.end: ; preds = %if.then, %while.body13
%13 = phi i64 [ %12, %if.then ], [ %3, %while.body13 ]
%14 = tail call i64 @llvm.hexagon.A2.vsubws(i64 %4, i64 %10)
%tobool12 = icmp eq i32 %6, 0
br i1 %tobool12, label %while.end, label %while.body13

while.end:
%add40 = add i32 %idx, 1
%15 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %add40, i32 10) nounwind
%next = add i32 %count, 1
%cc = icmp eq i32 %next, %n
br i1 %cc, label %end, label %while.body

end:
store i64 %10, i64* @A2, align 8
ret void
}
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/Hexagon/ifcvt-edge-weight.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 -print-machineinstrs=if-converter %s -o /dev/null 2>&1 | FileCheck %s
; RUN: llc -march=hexagon -mcpu=hexagonv5 -hexagon-eif=0 -print-machineinstrs=if-converter %s -o /dev/null 2>&1 | FileCheck %s
; Check that the edge weights are updated correctly after if-conversion.

; CHECK: BB#3:
Expand Down