Skip to content

Commit

Permalink
[AArch64] Split lowerVectorFCMP combine
Browse files Browse the repository at this point in the history
It's the only combine (AFAIK) that didn't use an apply function.
There is no reason for it to mutate instructions in the matcher, so split it up.

Reviewed By: aemerson, arsenm

Differential Revision: https://reviews.llvm.org/D154947
  • Loading branch information
Pierre-vh committed Jul 12, 2023
1 parent 227012c commit af67b67
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 11 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ def build_vector_lowering : GICombineGroup<[build_vector_to_dup]>;
def lower_vector_fcmp : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_FCMP):$root,
[{ return lowerVectorFCMP(*${root}, MRI, B); }]),
(apply [{}])>;
[{ return matchLowerVectorFCMP(*${root}, MRI, B); }]),
(apply [{ applyLowerVectorFCMP(*${root}, MRI, B); }])>;

def form_truncstore_matchdata : GIDefMatchData<"Register">;
def form_truncstore : GICombineRule<
Expand Down
35 changes: 26 additions & 9 deletions llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -949,29 +949,45 @@ getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
}

/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
bool lowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIB) {
bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIB) {
assert(MI.getOpcode() == TargetOpcode::G_FCMP);
const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();

Register Dst = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(Dst);
if (!DstTy.isVector() || !ST.hasNEON())
return false;
const auto Pred =
static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
Register LHS = MI.getOperand(2).getReg();
unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
if (EltSize == 16 && !ST.hasFullFP16())
return false;
if (EltSize != 16 && EltSize != 32 && EltSize != 64)
return false;
Register RHS = MI.getOperand(3).getReg();

return true;
}

/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIB) {
assert(MI.getOpcode() == TargetOpcode::G_FCMP);
const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();

const auto &CmpMI = cast<GFCmp>(MI);

Register Dst = CmpMI.getReg(0);
CmpInst::Predicate Pred = CmpMI.getCond();
Register LHS = CmpMI.getLHSReg();
Register RHS = CmpMI.getRHSReg();

LLT DstTy = MRI.getType(Dst);

auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);

// Compares against 0 have special target-specific pseudos.
bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;


bool Invert = false;
AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
if (Pred == CmpInst::Predicate::FCMP_ORD && IsZero) {
Expand All @@ -984,10 +1000,12 @@ bool lowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
} else
changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);

bool NoNans = ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;

// Instead of having an apply function, just build here to simplify things.
MIB.setInstrAndDebugLoc(MI);

const bool NoNans =
ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;

auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
Register CmpRes;
if (CC2 == AArch64CC::AL)
Expand All @@ -1002,7 +1020,6 @@ bool lowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
MRI.replaceRegWith(Dst, CmpRes);
MI.eraseFromParent();
return true;
}

bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
Expand Down

0 comments on commit af67b67

Please sign in to comment.