Skip to content

Commit

Permalink
[SDAG] Preserve unpredictable metadata, teach X86CmovConversion to re…
Browse files Browse the repository at this point in the history
…spect this metadata

Sometimes an developer would like to have more control over cmov vs branch. We have unpredictable metadata in LLVM IR, but currently it is ignored by X86 backend. Propagate this metadata and avoid cmov->branch conversion in X86CmovConversion for cmov with this metadata.

Example:

```
int MaxIndex(int n, int *a) {
    int t = 0;
    for (int i = 1; i < n; i++) {
        // cmov is converted to branch by X86CmovConversion
        if (a[i] > a[t]) t = i;
    }
    return t;
}

int MaxIndex2(int n, int *a) {
    int t = 0;
    for (int i = 1; i < n; i++) {
        // cmov is preserved
        if (__builtin_unpredictable(a[i] > a[t])) t = i;
    }
    return t;
}
```

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D118118
  • Loading branch information
davidbolvansky committed Jun 1, 2023
1 parent 0a21fb3 commit 09515f2
Show file tree
Hide file tree
Showing 19 changed files with 108 additions and 102 deletions.
81 changes: 40 additions & 41 deletions llvm/include/llvm/CodeGen/MachineInstr.h
Expand Up @@ -80,38 +80,39 @@ class MachineInstr
};

enum MIFlag {
NoFlags = 0,
FrameSetup = 1 << 0, // Instruction is used as a part of
// function frame setup code.
FrameDestroy = 1 << 1, // Instruction is used as a part of
// function frame destruction code.
BundledPred = 1 << 2, // Instruction has bundled predecessors.
BundledSucc = 1 << 3, // Instruction has bundled successors.
FmNoNans = 1 << 4, // Instruction does not support Fast
// math nan values.
FmNoInfs = 1 << 5, // Instruction does not support Fast
// math infinity values.
FmNsz = 1 << 6, // Instruction is not required to retain
// signed zero values.
FmArcp = 1 << 7, // Instruction supports Fast math
// reciprocal approximations.
FmContract = 1 << 8, // Instruction supports Fast math
// contraction operations like fma.
FmAfn = 1 << 9, // Instruction may map to Fast math
// intrinsic approximation.
FmReassoc = 1 << 10, // Instruction supports Fast math
// reassociation of operand order.
NoUWrap = 1 << 11, // Instruction supports binary operator
// no unsigned wrap.
NoSWrap = 1 << 12, // Instruction supports binary operator
// no signed wrap.
IsExact = 1 << 13, // Instruction supports division is
// known to be exact.
NoFPExcept = 1 << 14, // Instruction does not raise
// floatint-point exceptions.
NoMerge = 1 << 15, // Passes that drop source location info
// (e.g. branch folding) should skip
// this instruction.
NoFlags = 0,
FrameSetup = 1 << 0, // Instruction is used as a part of
// function frame setup code.
FrameDestroy = 1 << 1, // Instruction is used as a part of
// function frame destruction code.
BundledPred = 1 << 2, // Instruction has bundled predecessors.
BundledSucc = 1 << 3, // Instruction has bundled successors.
FmNoNans = 1 << 4, // Instruction does not support Fast
// math nan values.
FmNoInfs = 1 << 5, // Instruction does not support Fast
// math infinity values.
FmNsz = 1 << 6, // Instruction is not required to retain
// signed zero values.
FmArcp = 1 << 7, // Instruction supports Fast math
// reciprocal approximations.
FmContract = 1 << 8, // Instruction supports Fast math
// contraction operations like fma.
FmAfn = 1 << 9, // Instruction may map to Fast math
// intrinsic approximation.
FmReassoc = 1 << 10, // Instruction supports Fast math
// reassociation of operand order.
NoUWrap = 1 << 11, // Instruction supports binary operator
// no unsigned wrap.
NoSWrap = 1 << 12, // Instruction supports binary operator
// no signed wrap.
IsExact = 1 << 13, // Instruction supports division is
// known to be exact.
NoFPExcept = 1 << 14, // Instruction does not raise
// floatint-point exceptions.
NoMerge = 1 << 15, // Passes that drop source location info
// (e.g. branch folding) should skip
// this instruction.
Unpredictable = 1 << 16, // Instruction with unpredictable condition.
};

private:
Expand All @@ -120,12 +121,10 @@ class MachineInstr

// Operands are allocated by an ArrayRecycler.
MachineOperand *Operands = nullptr; // Pointer to the first operand.
uint16_t NumOperands = 0; // Number of operands on instruction.

uint16_t Flags = 0; // Various bits of additional
uint32_t Flags = 0; // Various bits of additional
// information about machine
// instruction.

uint16_t NumOperands = 0; // Number of operands on instruction.
uint8_t AsmPrinterFlags = 0; // Various bits of information used by
// the AsmPrinter to emit helpful
// comments. This is *not* semantic
Expand Down Expand Up @@ -357,7 +356,7 @@ class MachineInstr
}

/// Return the MI flags bitvector.
uint16_t getFlags() const {
uint32_t getFlags() const {
return Flags;
}

Expand All @@ -368,7 +367,7 @@ class MachineInstr

/// Set a MI flag.
void setFlag(MIFlag Flag) {
Flags |= (uint16_t)Flag;
Flags |= (uint32_t)Flag;
}

void setFlags(unsigned flags) {
Expand All @@ -379,7 +378,7 @@ class MachineInstr

/// clearFlag - Clear a MI flag.
void clearFlag(MIFlag Flag) {
Flags &= ~((uint16_t)Flag);
Flags &= ~((uint32_t)Flag);
}

/// Return true if MI is in a bundle (but not the first MI in a bundle).
Expand Down Expand Up @@ -1889,9 +1888,9 @@ class MachineInstr
/// Return the MIFlags which represent both MachineInstrs. This
/// should be used when merging two MachineInstrs into one. This routine does
/// not modify the MIFlags of this MachineInstr.
uint16_t mergeFlagsWith(const MachineInstr& Other) const;
uint32_t mergeFlagsWith(const MachineInstr& Other) const;

static uint16_t copyFlagsFromInstruction(const Instruction &I);
static uint32_t copyFlagsFromInstruction(const Instruction &I);

/// Copy all flags to MachineInst MIFlags
void copyIRFlags(const Instruction &I);
Expand Down
7 changes: 6 additions & 1 deletion llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Expand Up @@ -395,14 +395,16 @@ struct SDNodeFlags {
// negative "NoFPExcept" flag here (that defaults to true) makes the flag
// intersection logic more straightforward.
bool NoFPExcept : 1;
// Instructions with attached 'unpredictable' metadata on IR level.
bool Unpredictable : 1;

public:
/// Default constructor turns off all optimization flags.
SDNodeFlags()
: NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false),
NoInfs(false), NoSignedZeros(false), AllowReciprocal(false),
AllowContract(false), ApproximateFuncs(false),
AllowReassociation(false), NoFPExcept(false) {}
AllowReassociation(false), NoFPExcept(false), Unpredictable(false) {}

/// Propagate the fast-math-flags from an IR FPMathOperator.
void copyFMF(const FPMathOperator &FPMO) {
Expand All @@ -427,6 +429,7 @@ struct SDNodeFlags {
void setApproximateFuncs(bool b) { ApproximateFuncs = b; }
void setAllowReassociation(bool b) { AllowReassociation = b; }
void setNoFPExcept(bool b) { NoFPExcept = b; }
void setUnpredictable(bool b) { Unpredictable = b; }

// These are accessors for each flag.
bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
Expand All @@ -440,6 +443,7 @@ struct SDNodeFlags {
bool hasApproximateFuncs() const { return ApproximateFuncs; }
bool hasAllowReassociation() const { return AllowReassociation; }
bool hasNoFPExcept() const { return NoFPExcept; }
bool hasUnpredictable() const { return Unpredictable; }

/// Clear any flags in this flag set that aren't also set in Flags. All
/// flags will be cleared if Flags are undefined.
Expand All @@ -455,6 +459,7 @@ struct SDNodeFlags {
ApproximateFuncs &= Flags.ApproximateFuncs;
AllowReassociation &= Flags.AllowReassociation;
NoFPExcept &= Flags.NoFPExcept;
Unpredictable &= Flags.Unpredictable;
}
};

Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Expand Up @@ -300,7 +300,7 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
Register Op0 = getOrCreateVReg(*U.getOperand(0));
Register Op1 = getOrCreateVReg(*U.getOperand(1));
Register Res = getOrCreateVReg(U);
uint16_t Flags = 0;
uint32_t Flags = 0;
if (isa<Instruction>(U)) {
const Instruction &I = cast<Instruction>(U);
Flags = MachineInstr::copyFlagsFromInstruction(I);
Expand All @@ -314,7 +314,7 @@ bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
Register Op0 = getOrCreateVReg(*U.getOperand(0));
Register Res = getOrCreateVReg(U);
uint16_t Flags = 0;
uint32_t Flags = 0;
if (isa<Instruction>(U)) {
const Instruction &I = cast<Instruction>(U);
Flags = MachineInstr::copyFlagsFromInstruction(I);
Expand Down Expand Up @@ -345,7 +345,7 @@ bool IRTranslator::translateCompare(const User &U,
MIRBuilder.buildCopy(
Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
else {
uint16_t Flags = 0;
uint32_t Flags = 0;
if (CI)
Flags = MachineInstr::copyFlagsFromInstruction(*CI);
MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags);
Expand Down Expand Up @@ -1438,7 +1438,7 @@ bool IRTranslator::translateSelect(const User &U,
ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2));

uint16_t Flags = 0;
uint32_t Flags = 0;
if (const SelectInst *SI = dyn_cast<SelectInst>(&U))
Flags = MachineInstr::copyFlagsFromInstruction(*SI);

Expand Down Expand Up @@ -1864,7 +1864,7 @@ bool IRTranslator::translateConstrainedFPIntrinsic(
if (!Opcode)
return false;

unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI);
uint32_t Flags = MachineInstr::copyFlagsFromInstruction(FPI);
if (EB == fp::ExceptionBehavior::ebIgnore)
Flags |= MachineInstr::NoFPExcept;

Expand Down Expand Up @@ -2370,7 +2370,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return CLI->lowerCall(MIRBuilder, Info);
}
case Intrinsic::fptrunc_round: {
unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI);
uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI);

// Convert the metadata argument to a constant integer
Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(1))->getMetadata();
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/MIRParser/MILexer.cpp
Expand Up @@ -214,6 +214,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("nsw", MIToken::kw_nsw)
.Case("exact", MIToken::kw_exact)
.Case("nofpexcept", MIToken::kw_nofpexcept)
.Case("unpredictable", MIToken::kw_unpredictable)
.Case("debug-location", MIToken::kw_debug_location)
.Case("debug-instr-number", MIToken::kw_debug_instr_number)
.Case("dbg-instr-ref", MIToken::kw_dbg_instr_ref)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/MIRParser/MILexer.h
Expand Up @@ -73,6 +73,7 @@ struct MIToken {
kw_nsw,
kw_exact,
kw_nofpexcept,
kw_unpredictable,
kw_debug_location,
kw_debug_instr_number,
kw_dbg_instr_ref,
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/CodeGen/MIRParser/MIParser.cpp
Expand Up @@ -1451,7 +1451,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Token.is(MIToken::kw_nuw) ||
Token.is(MIToken::kw_nsw) ||
Token.is(MIToken::kw_exact) ||
Token.is(MIToken::kw_nofpexcept)) {
Token.is(MIToken::kw_nofpexcept) ||
Token.is(MIToken::kw_unpredictable)) {
// Mine frame and fast math flags
if (Token.is(MIToken::kw_frame_setup))
Flags |= MachineInstr::FrameSetup;
Expand Down Expand Up @@ -1479,6 +1480,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Flags |= MachineInstr::IsExact;
if (Token.is(MIToken::kw_nofpexcept))
Flags |= MachineInstr::NoFPExcept;
if (Token.is(MIToken::kw_unpredictable))
Flags |= MachineInstr::Unpredictable;

lex();
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/MIRPrinter.cpp
Expand Up @@ -801,6 +801,8 @@ void MIPrinter::print(const MachineInstr &MI) {
OS << "nofpexcept ";
if (MI.getFlag(MachineInstr::NoMerge))
OS << "nomerge ";
if (MI.getFlag(MachineInstr::Unpredictable))
OS << "unpredictable ";

OS << TII->getName(MI.getOpcode());
if (I < E)
Expand Down
9 changes: 6 additions & 3 deletions llvm/lib/CodeGen/MachineInstr.cpp
Expand Up @@ -534,14 +534,14 @@ void MachineInstr::cloneInstrSymbols(MachineFunction &MF,
setPCSections(MF, MI.getPCSections());
}

uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
uint32_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
// For now, the just return the union of the flags. If the flags get more
// complicated over time, we might need more logic here.
return getFlags() | Other.getFlags();
}

uint16_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
uint16_t MIFlags = 0;
uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
uint32_t MIFlags = 0;
// Copy the wrapping flags.
if (const OverflowingBinaryOperator *OB =
dyn_cast<OverflowingBinaryOperator>(&I)) {
Expand Down Expand Up @@ -575,6 +575,9 @@ uint16_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
MIFlags |= MachineInstr::MIFlag::FmReassoc;
}

if (I.getMetadata(LLVMContext::MD_unpredictable))
MIFlags |= MachineInstr::MIFlag::Unpredictable;

return MIFlags;
}

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
Expand Up @@ -1078,6 +1078,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,

if (Flags.hasNoFPExcept())
MI->setFlag(MachineInstr::MIFlag::NoFPExcept);

if (Flags.hasUnpredictable())
MI->setFlag(MachineInstr::MIFlag::Unpredictable);
}

// Emit all of the actual operands of this instruction, adding them to the
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Expand Up @@ -3381,6 +3381,9 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
Flags.copyFMF(*FPOp);

Flags.setUnpredictable(
cast<SelectInst>(I).getMetadata(LLVMContext::MD_unpredictable));

// Min/max matching is only viable if all output VTs are the same.
if (all_equal(ValueVTs)) {
EVT VT = ValueVTs[0];
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Expand Up @@ -6889,7 +6889,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(

// Set the flags on the inserted instructions to be the merged flags of the
// instructions that we have combined.
uint16_t Flags = Root.getFlags();
uint32_t Flags = Root.getFlags();
if (MUL)
Flags = Root.mergeFlagsWith(*MUL);
for (auto *MI : InsInstrs)
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Expand Up @@ -226,7 +226,7 @@ void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
MachineInstr &NewMI2) const {
// Propagate FP flags from the original instructions.
// But clear poison-generating flags because those may not be valid now.
uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
NewMI1.setFlags(IntersectedFlags);
NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
Expand All @@ -239,7 +239,7 @@ void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
}

void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &MI,
uint16_t Flags) const {
uint32_t Flags) const {
MI.setFlags(Flags);
MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
Expand Down Expand Up @@ -841,7 +841,7 @@ void PPCInstrInfo::reassociateFMA(
}
}

uint16_t IntersectedFlags = 0;
uint32_t IntersectedFlags = 0;
if (IsILPReassociate)
IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
else
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/PowerPC/PPCInstrInfo.h
Expand Up @@ -518,7 +518,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {

// PowerPC specific version of setSpecialOperandAttr that copies Flags to MI
// and clears nuw, nsw, and exact flags.
void setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const;
void setSpecialOperandAttr(MachineInstr &MI, uint32_t Flags) const;

bool isCoalescableExtInstr(const MachineInstr &MI,
Register &SrcReg, Register &DstReg,
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Expand Up @@ -1344,7 +1344,7 @@ void RISCVInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
MachineInstr &OldMI2,
MachineInstr &NewMI1,
MachineInstr &NewMI2) const {
uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
NewMI1.setFlags(IntersectedFlags);
NewMI2.setFlags(IntersectedFlags);
}
Expand Down Expand Up @@ -1630,7 +1630,7 @@ static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev,

Register DstReg = Dst.getReg();
unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern);
auto IntersectedFlags = Root.getFlags() & Prev.getFlags();
uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags();
DebugLoc MergedLoc =
DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc());

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
Expand Up @@ -217,7 +217,7 @@ Register insertAssignInstr(Register Reg, Type *Ty, SPIRVType *SpirvTy,
GR->assignSPIRVTypeToVReg(SpirvTy, NewReg, MIB.getMF());
// Copy MIFlags from Def to ASSIGN_TYPE instruction. It's required to keep
// the flags after instruction selection.
const uint16_t Flags = Def->getFlags();
const uint32_t Flags = Def->getFlags();
MIB.buildInstr(SPIRV::ASSIGN_TYPE)
.addDef(Reg)
.addUse(NewReg)
Expand Down

0 comments on commit 09515f2

Please sign in to comment.