Skip to content

Commit

Permalink
[RISCV] Add strictfp support for compares.
Browse files Browse the repository at this point in the history
This adds support for STRICT_FSETCC(quiet) and STRICT_FSETCCS(signaling).

FEQ matches well to STRICT_FSETCC oeq.
FLT/FLE matches well to STRICT_FSETCCS olt/ole.

Others require commuting operands or multiple instructions.

STRICT_FSETCC olt/ole/ogt/oge/ult/ule/ugt/uge uses FLT/FLE,
but we need to save/restore FFLAGS around them to avoid spurious
exceptions. I've implemented pseudo instructions with a
CustomInserter to insert the save/restore CSR instructions.
Unfortunately, this doesn't honor exceptions for signaling NANs
but I'm not sure if signaling nans are really supported by the
constrained intrinsics.

STRICT_FSETCC one and ueq expand to a pair of FLT instructions
with a save/restore of fflags around each. This could be improved
in the future.

There may be some opportunities to generate better code for strict
comparisons mixed with nonans fast math flags. I've left FIXMEs in
the .td files for that.

Co-Authored-by: ShihPo Hung <shihpo.hung@sifive.com>

Reviewed By: arcbbb

Differential Revision: https://reviews.llvm.org/D116694
  • Loading branch information
topperc committed Jan 12, 2022
1 parent 248d55a commit 63b17eb
Show file tree
Hide file tree
Showing 10 changed files with 3,277 additions and 24 deletions.
13 changes: 10 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Expand Up @@ -3593,9 +3593,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (Legalized) {
// If we expanded the SETCC by swapping LHS and RHS, or by inverting the
// condition code, create a new SETCC node.
if (Tmp3.getNode())
Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
Tmp1, Tmp2, Tmp3, Node->getFlags());
if (Tmp3.getNode()) {
if (IsStrict) {
Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(),
{Chain, Tmp1, Tmp2, Tmp3}, Node->getFlags());
Chain = Tmp1.getValue(1);
} else {
Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1,
Tmp2, Tmp3, Node->getFlags());
}
}

// If we expanded the SETCC by inverting the condition code, then wrap
// the existing SETCC in a NOT to restore the intended condition.
Expand Down
54 changes: 54 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Expand Up @@ -343,6 +343,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
Expand Down Expand Up @@ -393,6 +395,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
Expand Down Expand Up @@ -426,6 +430,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
Expand Down Expand Up @@ -8070,6 +8076,42 @@ static bool isSelectPseudo(MachineInstr &MI) {
}
}

static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
unsigned RelOpcode, unsigned EqOpcode,
const RISCVSubtarget &Subtarget) {
DebugLoc DL = MI.getDebugLoc();
Register DstReg = MI.getOperand(0).getReg();
Register Src1Reg = MI.getOperand(1).getReg();
Register Src2Reg = MI.getOperand(2).getReg();
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();

// Save the current FFLAGS.
BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);

auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
.addReg(Src1Reg)
.addReg(Src2Reg);
if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);

// Restore the FFLAGS.
BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
.addReg(SavedFFlags, RegState::Kill);

// Issue a dummy FEQ opcode to raise exception for signaling NaNs.
auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
.addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
.addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);

// Erase the pseudoinstruction.
MI.eraseFromParent();
return BB;
}

static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
MachineBasicBlock *BB,
const RISCVSubtarget &Subtarget) {
Expand Down Expand Up @@ -8211,6 +8253,18 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitBuildPairF64Pseudo(MI, BB);
case RISCV::SplitF64Pseudo:
return emitSplitF64Pseudo(MI, BB);
case RISCV::PseudoQuietFLE_H:
return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
case RISCV::PseudoQuietFLT_H:
return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
case RISCV::PseudoQuietFLE_S:
return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
case RISCV::PseudoQuietFLT_S:
return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
case RISCV::PseudoQuietFLE_D:
return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
case RISCV::PseudoQuietFLT_D:
return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
}
}

Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrFormats.td
Expand Up @@ -206,6 +206,13 @@ class Pseudo<dag outs, dag ins, list<dag> pattern, string opcodestr = "", string
let isCodeGenOnly = 1;
}

class PseudoQuietFCMP<RegisterClass Ty>
: Pseudo<(outs GPR:$rd), (ins Ty:$rs1, Ty:$rs2), []> {
let hasSideEffects = 1;
let mayLoad = 0;
let mayStore = 0;
}

// Pseudo load instructions.
class PseudoLoad<string opcodestr, RegisterClass rdty = GPR>
: Pseudo<(outs rdty:$rd), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr"> {
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.td
Expand Up @@ -1369,6 +1369,10 @@ def ReadFRM : ReadSysReg<SysRegFRM, [FRM]>;
def WriteFRM : WriteSysReg<SysRegFRM, [FRM]>;
def WriteFRMImm : WriteSysRegImm<SysRegFRM, [FRM]>;

let hasSideEffects = true in {
def ReadFFLAGS : ReadSysReg<SysRegFFLAGS, [FFLAGS]>;
def WriteFFLAGS : WriteSysReg<SysRegFFLAGS, [FFLAGS]>;
}
/// Other pseudo-instructions

// Pessimistically assume the stack pointer will be clobbered
Expand Down
39 changes: 32 additions & 7 deletions llvm/lib/Target/RISCV/RISCVInstrInfoD.td
Expand Up @@ -158,6 +158,10 @@ def : InstAlias<"fge.d $rd, $rs, $rt",

def PseudoFLD : PseudoFloatLoad<"fld", FPR64>;
def PseudoFSD : PseudoStore<"fsd", FPR64>;
let usesCustomInserter = 1 in {
def PseudoQuietFLE_D : PseudoQuietFCMP<FPR64>;
def PseudoQuietFLT_D : PseudoQuietFCMP<FPR64>;
}
} // Predicates = [HasStdExtD]

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -222,13 +226,34 @@ def : PatFpr64Fpr64<fminnum, FMIN_D>;
def : PatFpr64Fpr64<fmaxnum, FMAX_D>;

/// Setcc

def : PatFpr64Fpr64<seteq, FEQ_D>;
def : PatFpr64Fpr64<setoeq, FEQ_D>;
def : PatFpr64Fpr64<setlt, FLT_D>;
def : PatFpr64Fpr64<setolt, FLT_D>;
def : PatFpr64Fpr64<setle, FLE_D>;
def : PatFpr64Fpr64<setole, FLE_D>;
// FIXME: SETEQ/SETLT/SETLE imply nonans, can we pick better instructions for
// strict versions of those.

// Match non-signaling FEQ_D
def : PatSetCC<FPR64, any_fsetcc, SETEQ, FEQ_D>;
def : PatSetCC<FPR64, any_fsetcc, SETOEQ, FEQ_D>;
def : PatSetCC<FPR64, strict_fsetcc, SETLT, PseudoQuietFLT_D>;
def : PatSetCC<FPR64, strict_fsetcc, SETOLT, PseudoQuietFLT_D>;
def : PatSetCC<FPR64, strict_fsetcc, SETLE, PseudoQuietFLE_D>;
def : PatSetCC<FPR64, strict_fsetcc, SETOLE, PseudoQuietFLE_D>;

// Match signaling FEQ_D
def : Pat<(strict_fsetccs FPR64:$rs1, FPR64:$rs2, SETEQ),
(AND (FLE_D $rs1, $rs2),
(FLE_D $rs2, $rs1))>;
def : Pat<(strict_fsetccs FPR64:$rs1, FPR64:$rs2, SETOEQ),
(AND (FLE_D $rs1, $rs2),
(FLE_D $rs2, $rs1))>;
// If both operands are the same, use a single FLE.
def : Pat<(strict_fsetccs FPR64:$rs1, FPR64:$rs1, SETEQ),
(FLE_D $rs1, $rs1)>;
def : Pat<(strict_fsetccs FPR64:$rs1, FPR64:$rs1, SETOEQ),
(FLE_D $rs1, $rs1)>;

def : PatSetCC<FPR64, any_fsetccs, SETLT, FLT_D>;
def : PatSetCC<FPR64, any_fsetccs, SETOLT, FLT_D>;
def : PatSetCC<FPR64, any_fsetccs, SETLE, FLE_D>;
def : PatSetCC<FPR64, any_fsetccs, SETOLE, FLE_D>;

def Select_FPR64_Using_CC_GPR : SelectCC_rrirr<FPR64, GPR>;

Expand Down
42 changes: 35 additions & 7 deletions llvm/lib/Target/RISCV/RISCVInstrInfoF.td
Expand Up @@ -309,6 +309,10 @@ def : MnemonicAlias<"fmv.x.s", "fmv.x.w">;

def PseudoFLW : PseudoFloatLoad<"flw", FPR32>;
def PseudoFSW : PseudoStore<"fsw", FPR32>;
let usesCustomInserter = 1 in {
def PseudoQuietFLE_S : PseudoQuietFCMP<FPR32>;
def PseudoQuietFLT_S : PseudoQuietFCMP<FPR32>;
}
} // Predicates = [HasStdExtF]

//===----------------------------------------------------------------------===//
Expand All @@ -319,6 +323,9 @@ def PseudoFSW : PseudoStore<"fsw", FPR32>;
def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;

/// Generic pattern classes
class PatSetCC<RegisterClass Ty, SDPatternOperator OpNode, CondCode Cond, RVInst Inst>
: Pat<(OpNode Ty:$rs1, Ty:$rs2, Cond), (Inst $rs1, $rs2)>;

class PatFpr32Fpr32<SDPatternOperator OpNode, RVInstR Inst>
: Pat<(OpNode FPR32:$rs1, FPR32:$rs2), (Inst $rs1, $rs2)>;

Expand Down Expand Up @@ -373,13 +380,34 @@ def : PatFpr32Fpr32<fminnum, FMIN_S>;
def : PatFpr32Fpr32<fmaxnum, FMAX_S>;

/// Setcc

def : PatFpr32Fpr32<seteq, FEQ_S>;
def : PatFpr32Fpr32<setoeq, FEQ_S>;
def : PatFpr32Fpr32<setlt, FLT_S>;
def : PatFpr32Fpr32<setolt, FLT_S>;
def : PatFpr32Fpr32<setle, FLE_S>;
def : PatFpr32Fpr32<setole, FLE_S>;
// FIXME: SETEQ/SETLT/SETLE imply nonans, can we pick better instructions for
// strict versions of those.

// Match non-signaling FEQ_S
def : PatSetCC<FPR32, any_fsetcc, SETEQ, FEQ_S>;
def : PatSetCC<FPR32, any_fsetcc, SETOEQ, FEQ_S>;
def : PatSetCC<FPR32, strict_fsetcc, SETLT, PseudoQuietFLT_S>;
def : PatSetCC<FPR32, strict_fsetcc, SETOLT, PseudoQuietFLT_S>;
def : PatSetCC<FPR32, strict_fsetcc, SETLE, PseudoQuietFLE_S>;
def : PatSetCC<FPR32, strict_fsetcc, SETOLE, PseudoQuietFLE_S>;

// Match signaling FEQ_S
def : Pat<(strict_fsetccs FPR32:$rs1, FPR32:$rs2, SETEQ),
(AND (FLE_S $rs1, $rs2),
(FLE_S $rs2, $rs1))>;
def : Pat<(strict_fsetccs FPR32:$rs1, FPR32:$rs2, SETOEQ),
(AND (FLE_S $rs1, $rs2),
(FLE_S $rs2, $rs1))>;
// If both operands are the same, use a single FLE.
def : Pat<(strict_fsetccs FPR32:$rs1, FPR32:$rs1, SETEQ),
(FLE_S $rs1, $rs1)>;
def : Pat<(strict_fsetccs FPR32:$rs1, FPR32:$rs1, SETOEQ),
(FLE_S $rs1, $rs1)>;

def : PatSetCC<FPR32, any_fsetccs, SETLT, FLT_S>;
def : PatSetCC<FPR32, any_fsetccs, SETOLT, FLT_S>;
def : PatSetCC<FPR32, any_fsetccs, SETLE, FLE_S>;
def : PatSetCC<FPR32, any_fsetccs, SETOLE, FLE_S>;

def Select_FPR32_Using_CC_GPR : SelectCC_rrirr<FPR32, GPR>;

Expand Down
39 changes: 32 additions & 7 deletions llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
Expand Up @@ -182,6 +182,10 @@ def : InstAlias<"fge.h $rd, $rs, $rt",
let Predicates = [HasStdExtZfhmin] in {
def PseudoFLH : PseudoFloatLoad<"flh", FPR16>;
def PseudoFSH : PseudoStore<"fsh", FPR16>;
let usesCustomInserter = 1 in {
def PseudoQuietFLE_H : PseudoQuietFCMP<FPR16>;
def PseudoQuietFLT_H : PseudoQuietFCMP<FPR16>;
}
} // Predicates = [HasStdExtZfhmin]

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -246,13 +250,34 @@ def : PatFpr16Fpr16<fminnum, FMIN_H>;
def : PatFpr16Fpr16<fmaxnum, FMAX_H>;

/// Setcc

def : PatFpr16Fpr16<seteq, FEQ_H>;
def : PatFpr16Fpr16<setoeq, FEQ_H>;
def : PatFpr16Fpr16<setlt, FLT_H>;
def : PatFpr16Fpr16<setolt, FLT_H>;
def : PatFpr16Fpr16<setle, FLE_H>;
def : PatFpr16Fpr16<setole, FLE_H>;
// FIXME: SETEQ/SETLT/SETLE imply nonans, can we pick better instructions for
// strict versions of those.

// Match non-signaling FEQ_D
def : PatSetCC<FPR16, any_fsetcc, SETEQ, FEQ_H>;
def : PatSetCC<FPR16, any_fsetcc, SETOEQ, FEQ_H>;
def : PatSetCC<FPR16, strict_fsetcc, SETLT, PseudoQuietFLT_H>;
def : PatSetCC<FPR16, strict_fsetcc, SETOLT, PseudoQuietFLT_H>;
def : PatSetCC<FPR16, strict_fsetcc, SETLE, PseudoQuietFLE_H>;
def : PatSetCC<FPR16, strict_fsetcc, SETOLE, PseudoQuietFLE_H>;

// Match signaling FEQ_H
def : Pat<(strict_fsetccs FPR16:$rs1, FPR16:$rs2, SETEQ),
(AND (FLE_H $rs1, $rs2),
(FLE_H $rs2, $rs1))>;
def : Pat<(strict_fsetccs FPR16:$rs1, FPR16:$rs2, SETOEQ),
(AND (FLE_H $rs1, $rs2),
(FLE_H $rs2, $rs1))>;
// If both operands are the same, use a single FLE.
def : Pat<(strict_fsetccs FPR16:$rs1, FPR16:$rs1, SETEQ),
(FLE_H $rs1, $rs1)>;
def : Pat<(strict_fsetccs FPR16:$rs1, FPR16:$rs1, SETOEQ),
(FLE_H $rs1, $rs1)>;

def : PatSetCC<FPR16, any_fsetccs, SETLT, FLT_H>;
def : PatSetCC<FPR16, any_fsetccs, SETOLT, FLT_H>;
def : PatSetCC<FPR16, any_fsetccs, SETLE, FLE_H>;
def : PatSetCC<FPR16, any_fsetccs, SETOLE, FLE_H>;

def Select_FPR16_Using_CC_GPR : SelectCC_rrirr<FPR16, GPR>;
} // Predicates = [HasStdExtZfh]
Expand Down

0 comments on commit 63b17eb

Please sign in to comment.