Skip to content

Commit

Permalink
[RISCV] Add support for predicating AND/OR/XOR/ADD/SUB with short-for…
Browse files Browse the repository at this point in the history
…ward-branch-opt.

sifive-7-series can predicate ALU instructions in the shadow of a
branch not just move instructions.

This patch implements analyzeSelect/optimizeSelect to predicate
these operations. This is based on ARM's implementation which can
predicate using flags and condition codes.

I've restricted it to just the instructions we have test cases for,
but it can be extended in the future.

Reviewed By: reames

Differential Revision: https://reviews.llvm.org/D140053
  • Loading branch information
topperc committed Dec 17, 2022
1 parent c1266ca commit da7415a
Show file tree
Hide file tree
Showing 6 changed files with 258 additions and 58 deletions.
34 changes: 29 additions & 5 deletions llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
Expand Up @@ -83,6 +83,13 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
// tablegen definition for the pseudo.
switch (MBBI->getOpcode()) {
case RISCV::PseudoCCMOVGPR:
case RISCV::PseudoCCADD:
case RISCV::PseudoCCSUB:
case RISCV::PseudoCCAND:
case RISCV::PseudoCCOR:
case RISCV::PseudoCCXOR:
case RISCV::PseudoCCADDW:
case RISCV::PseudoCCSUBW:
return expandCCOp(MBB, MBBI, NextMBBI);
case RISCV::PseudoVSETVLI:
case RISCV::PseudoVSETVLIX0:
Expand Down Expand Up @@ -114,7 +121,6 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
assert(MBBI->getOpcode() == RISCV::PseudoCCMOVGPR && "Unexpected opcode");

MachineFunction *MF = MBB.getParent();
MachineInstr &MI = *MBBI;
Expand All @@ -141,10 +147,28 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
Register DestReg = MI.getOperand(0).getReg();
assert(MI.getOperand(4).getReg() == DestReg);

// Add MV.
BuildMI(TrueBB, DL, TII->get(RISCV::ADDI), DestReg)
.add(MI.getOperand(5))
.addImm(0);
if (MI.getOpcode() == RISCV::PseudoCCMOVGPR) {
// Add MV.
BuildMI(TrueBB, DL, TII->get(RISCV::ADDI), DestReg)
.add(MI.getOperand(5))
.addImm(0);
} else {
unsigned NewOpc;
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode!");
case RISCV::PseudoCCADD: NewOpc = RISCV::ADD; break;
case RISCV::PseudoCCSUB: NewOpc = RISCV::SUB; break;
case RISCV::PseudoCCAND: NewOpc = RISCV::AND; break;
case RISCV::PseudoCCOR: NewOpc = RISCV::OR; break;
case RISCV::PseudoCCXOR: NewOpc = RISCV::XOR; break;
case RISCV::PseudoCCADDW: NewOpc = RISCV::ADDW; break;
case RISCV::PseudoCCSUBW: NewOpc = RISCV::SUBW; break;
}
BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg)
.add(MI.getOperand(5))
.add(MI.getOperand(6));
}

TrueBB->addSuccessor(MergeBB);

Expand Down
146 changes: 146 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Expand Up @@ -1045,6 +1045,152 @@ bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
}
}

// If the operation has a predicated pseudo instruction, return the pseudo
// instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.
// TODO: Support more operations.
unsigned getPredicatedOpcode(unsigned Opcode) {
switch (Opcode) {
case RISCV::ADD: return RISCV::PseudoCCADD; break;
case RISCV::SUB: return RISCV::PseudoCCSUB; break;
case RISCV::AND: return RISCV::PseudoCCAND; break;
case RISCV::OR: return RISCV::PseudoCCOR; break;
case RISCV::XOR: return RISCV::PseudoCCXOR; break;

case RISCV::ADDW: return RISCV::PseudoCCADDW; break;
case RISCV::SUBW: return RISCV::PseudoCCSUBW; break;
}

return RISCV::INSTRUCTION_LIST_END;
}

/// Identify instructions that can be folded into a CCMOV instruction, and
/// return the defining instruction.
static MachineInstr *canFoldAsPredicatedOp(Register Reg,
const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII) {
if (!Reg.isVirtual())
return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
return nullptr;
MachineInstr *MI = MRI.getVRegDef(Reg);
if (!MI)
return nullptr;
// Check if MI can be predicated and folded into the CCMOV.
if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
return nullptr;
// Check if MI has any other defs or physreg uses.
for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
// Reject frame index operands, PEI can't handle the predicated pseudos.
if (MO.isFI() || MO.isCPI() || MO.isJTI())
return nullptr;
if (!MO.isReg())
continue;
// MI can't have any tied operands, that would conflict with predication.
if (MO.isTied())
return nullptr;
if (MO.isDef())
return nullptr;
// Allow constant physregs.
if (Register::isPhysicalRegister(MO.getReg()) &&
!MRI.isConstantPhysReg(MO.getReg()))
return nullptr;
}
bool DontMoveAcrossStores = true;
if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
return nullptr;
return MI;
}

bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI,
SmallVectorImpl<MachineOperand> &Cond,
unsigned &TrueOp, unsigned &FalseOp,
bool &Optimizable) const {
assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
"Unknown select instruction");
// CCMOV operands:
// 0: Def.
// 1: LHS of compare.
// 2: RHS of compare.
// 3: Condition code.
// 4: False use.
// 5: True use.
TrueOp = 5;
FalseOp = 4;
Cond.push_back(MI.getOperand(1));
Cond.push_back(MI.getOperand(2));
Cond.push_back(MI.getOperand(3));
// We can only fold when we support short forward branch opt.
Optimizable = STI.hasShortForwardBranchOpt();
return false;
}

MachineInstr *
RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
SmallPtrSetImpl<MachineInstr *> &SeenMIs,
bool PreferFalse) const {
assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
"Unknown select instruction");
if (!STI.hasShortForwardBranchOpt())
return nullptr;

MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
MachineInstr *DefMI =
canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this);
bool Invert = !DefMI;
if (!DefMI)
DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this);
if (!DefMI)
return nullptr;

// Find new register class to use.
MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4);
Register DestReg = MI.getOperand(0).getReg();
const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
if (!MRI.constrainRegClass(DestReg, PreviousClass))
return nullptr;

unsigned PredOpc = getPredicatedOpcode(DefMI->getOpcode());
assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!");

// Create a new predicated version of DefMI.
MachineInstrBuilder NewMI =
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg);

// Copy the condition portion.
NewMI.add(MI.getOperand(1));
NewMI.add(MI.getOperand(2));

// Add condition code, inverting if necessary.
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
if (Invert)
CC = RISCVCC::getOppositeBranchCondition(CC);
NewMI.addImm(CC);

// Copy the false register.
NewMI.add(FalseReg);

// Copy all the DefMI operands.
const MCInstrDesc &DefDesc = DefMI->getDesc();
for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
NewMI.add(DefMI->getOperand(i));

// Update SeenMIs set: register newly created MI and erase removed DefMI.
SeenMIs.insert(NewMI);
SeenMIs.erase(DefMI);

// If MI is inside a loop, and DefMI is outside the loop, then kill flags on
// DefMI would be invalid when tranferred inside the loop. Checking for a
// loop is expensive, but at least remove kill flags if they are in different
// BBs.
if (DefMI->getParent() != MI.getParent())
NewMI->clearKillInfo();

// The caller will erase MI, but not DefMI.
DefMI->eraseFromParent();
return NewMI;
}

unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (MI.isMetaInstruction())
return 0;
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.h
Expand Up @@ -112,6 +112,14 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
bool isBranchOffsetInRange(unsigned BranchOpc,
int64_t BrOffset) const override;

bool analyzeSelect(const MachineInstr &MI,
SmallVectorImpl<MachineOperand> &Cond, unsigned &TrueOp,
unsigned &FalseOp, bool &Optimizable) const override;

MachineInstr *optimizeSelect(MachineInstr &MI,
SmallPtrSetImpl<MachineInstr *> &SeenMIs,
bool) const override;

bool isAsCheapAsAMove(const MachineInstr &MI) const override;

std::optional<DestSourcePair>
Expand Down
40 changes: 39 additions & 1 deletion llvm/lib/Target/RISCV/RISCVInstrInfo.td
Expand Up @@ -1336,7 +1336,7 @@ def riscv_selectcc_frag : PatFrag<(ops node:$lhs, node:$rhs, node:$cc,
node:$falsev), [{}],
IntCCtoRISCVCC>;

let Predicates = [HasShortForwardBranchOpt],
let Predicates = [HasShortForwardBranchOpt], isSelect = 1,
Constraints = "$dst = $falsev", isCommutable = 1, Size = 8 in {
// This instruction moves $truev to $dst when the condition is true. It will
// be expanded to control flow in RISCVExpandPseudoInsts.
Expand All @@ -1350,6 +1350,44 @@ def PseudoCCMOVGPR : Pseudo<(outs GPR:$dst),
Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
}

// Conditional binops, that updates update $dst to (op rs1, rs2) when condition
// is true. Returns $falsev otherwise. Selected by optimizeSelect.
// TODO: Can we use DefaultOperands on the regular binop to accomplish this more
// like how ARM does predication?
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8,
Constraints = "$dst = $falsev" in {
def PseudoCCADD : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
def PseudoCCSUB : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
def PseudoCCAND : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
def PseudoCCOR : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
def PseudoCCXOR : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;

// RV64I instructions
def PseudoCCADDW : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
def PseudoCCSUBW : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
}

multiclass SelectCC_GPR_rrirr<RegisterClass valty> {
let usesCustomInserter = 1 in
def _Using_CC_GPR : Pseudo<(outs valty:$dst),
Expand Down

0 comments on commit da7415a

Please sign in to comment.