diff --git a/llvm/include/llvm/CodeGen/MacroFusion.h b/llvm/include/llvm/CodeGen/MacroFusion.h index ea2c7a5faae38..c2c08c6c84bce 100644 --- a/llvm/include/llvm/CodeGen/MacroFusion.h +++ b/llvm/include/llvm/CodeGen/MacroFusion.h @@ -14,8 +14,8 @@ #ifndef LLVM_CODEGEN_MACROFUSION_H #define LLVM_CODEGEN_MACROFUSION_H -#include #include +#include namespace llvm { @@ -29,10 +29,10 @@ class SUnit; /// Check if the instr pair, FirstMI and SecondMI, should be fused /// together. Given SecondMI, when FirstMI is unspecified, then check if /// SecondMI may be part of a fused pair at all. -using ShouldSchedulePredTy = std::function; +using MacroFusionPredTy = bool (*)(const TargetInstrInfo &TII, + const TargetSubtargetInfo &STI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI); /// Checks if the number of cluster edges between SU and its predecessors is /// less than FuseLimit @@ -48,15 +48,17 @@ bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, /// Create a DAG scheduling mutation to pair instructions back to back /// for instructions that benefit according to the target-specific -/// shouldScheduleAdjacent predicate function. +/// predicate functions. shouldScheduleAdjacent will be true if any of the +/// provided predicates are true. std::unique_ptr -createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent); +createMacroFusionDAGMutation(std::vector Predicates); /// Create a DAG scheduling mutation to pair branch instructions with one /// of their predecessors back to back for instructions that benefit according -/// to the target-specific shouldScheduleAdjacent predicate function. +/// to the target-specific predicate functions. shouldScheduleAdjacent will be +/// true if any of the provided predicates are true. std::unique_ptr -createBranchMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent); +createBranchMacroFusionDAGMutation(std::vector Predicates); } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h index 55ef95c285431..7091776b04a44 100644 --- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MacroFusion.h" #include "llvm/CodeGen/PBQPRAConstraint.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/IR/GlobalValue.h" @@ -53,6 +54,22 @@ class TargetRegisterInfo; class TargetSchedModel; class Triple; +//===----------------------------------------------------------------------===// + +/// Used to provide information for macro fusion. +struct MacroFusionEntry { + const char *Name; ///< Name of macro fusion + MacroFusionPredTy Pred; ///< Predicator function of macro fusion + + /// Compare routine for std::lower_bound + bool operator<(StringRef S) const { return StringRef(Name) < S; } + + /// Compare routine for std::is_sorted. + bool operator<(const MacroFusionEntry &Other) const { + return StringRef(Name) < StringRef(Other.Name); + } +}; + //===----------------------------------------------------------------------===// /// /// TargetSubtargetInfo - Generic base class for all target subtargets. All @@ -60,6 +77,9 @@ class Triple; /// be exposed through a TargetSubtargetInfo-derived class. /// class TargetSubtargetInfo : public MCSubtargetInfo { +private: + ArrayRef MacroFusionTable; + protected: // Can only create subclasses... TargetSubtargetInfo(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, ArrayRef PF, @@ -67,7 +87,10 @@ class TargetSubtargetInfo : public MCSubtargetInfo { const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, const InstrStage *IS, - const unsigned *OC, const unsigned *FP); + const unsigned *OC, const unsigned *FP, + ArrayRef MF); + + void overrideFusionBits(); public: // AntiDepBreakMode - Type of anti-dependence breaking that should @@ -323,6 +346,9 @@ class TargetSubtargetInfo : public MCSubtargetInfo { /// helps removing redundant copies generated by register allocator when /// handling complex eviction chains. virtual bool enableSpillageCopyElimination() const { return false; } + + /// Get the list of MacroFusion predicates. + virtual std::vector getMacroFusions() const; }; } // end namespace llvm diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h index 98ebe42cfd133..aa187e5cb4006 100644 --- a/llvm/include/llvm/MC/MCSchedule.h +++ b/llvm/include/llvm/MC/MCSchedule.h @@ -14,6 +14,7 @@ #ifndef LLVM_MC_MCSCHEDULE_H #define LLVM_MC_MCSCHEDULE_H +#include "llvm/ADT/Bitset.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/DataTypes.h" #include @@ -196,6 +197,9 @@ struct MCExtraProcessorInfo { unsigned StoreQueueID; }; +const unsigned MaxMacroFusions = 256; +using MacroFusionBitset = Bitset; + /// Machine model for scheduling, bundling, and heuristics. /// /// The machine model directly provides basic information about the @@ -325,9 +329,14 @@ struct MCSchedModel { const InstrItinerary *InstrItineraries; const MCExtraProcessorInfo *ExtraProcessorInfo; + const MacroFusionBitset *MacroFusionBits; bool hasExtraProcessorInfo() const { return ExtraProcessorInfo; } + const MacroFusionBitset *getMacroFusionBits() const { + return MacroFusionBits; + } + unsigned getProcessorID() const { return ProcID; } /// Does this machine model include instruction-level scheduling. diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h index f172a799aa333..1ae5134f04719 100644 --- a/llvm/include/llvm/MC/MCSubtargetInfo.h +++ b/llvm/include/llvm/MC/MCSubtargetInfo.h @@ -92,6 +92,8 @@ class MCSubtargetInfo { FeatureBitset FeatureBits; // Feature bits for current CPU + FS std::string FeatureString; // Feature string + MacroFusionBitset FusionBits; // Fusion bits + public: MCSubtargetInfo(const MCSubtargetInfo &) = default; MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef TuneCPU, @@ -120,6 +122,15 @@ class MCSubtargetInfo { return FeatureBits[Feature]; } + const MacroFusionBitset &getMacroFusionBits() const { return FusionBits; } + void enableMacroFusion(unsigned MacroFusion) { FusionBits.set(MacroFusion); } + void disableMacroFusion(unsigned MacroFusion) { + FusionBits.reset(MacroFusion); + } + bool hasMacroFusion(unsigned MacroFusion) const { + return FusionBits.test(MacroFusion); + } + protected: /// Initialize the scheduling model and feature bits. /// @@ -295,6 +306,9 @@ class MCSubtargetInfo { /// \return if target want to issue a prefetch in address space \p AS. virtual bool shouldPrefetchAddressSpace(unsigned AS) const; + + /// Enable macro fusion for this subtarget. + virtual bool enableMacroFusion() const { return FusionBits.any(); } }; } // end namespace llvm diff --git a/llvm/include/llvm/Target/TargetInstrPredicate.td b/llvm/include/llvm/Target/TargetInstrPredicate.td index 9f2cde9d92305..82c4c7b23a49b 100644 --- a/llvm/include/llvm/Target/TargetInstrPredicate.td +++ b/llvm/include/llvm/Target/TargetInstrPredicate.td @@ -95,6 +95,12 @@ class MCOperandPredicate : MCInstPredicate { // Return true if machine operand at position `Index` is a register operand. class CheckIsRegOperand : MCOperandPredicate; +// Return true if machine operand at position `Index` is a virtual register operand. +class CheckIsVRegOperand : MCOperandPredicate; + +// Return true if machine operand at position `Index` is not a virtual register operand. +class CheckIsNotVRegOperand : CheckNot>; + // Return true if machine operand at position `Index` is an immediate operand. class CheckIsImmOperand : MCOperandPredicate; diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td index 949baa5d2105c..b4d056bcc13c2 100644 --- a/llvm/include/llvm/Target/TargetSchedule.td +++ b/llvm/include/llvm/Target/TargetSchedule.td @@ -53,6 +53,7 @@ include "llvm/Target/TargetItinerary.td" class Predicate; // Forward def +class Fusion; // DAG operator that interprets the DAG args as Instruction defs. def instrs; @@ -122,6 +123,9 @@ class SchedMachineModel { // using intervals via ResourceSegments (see // llvm/include/llvm/CodeGen/MachineScheduler.h). bit EnableIntervals = false; + + // List of Fusion. + list MacroFusions = []; } def NoSchedModel : SchedMachineModel { @@ -584,3 +588,117 @@ class MemoryQueue { class LoadQueue : MemoryQueue; class StoreQueue : MemoryQueue; + +// The target instruction that FusionPredicate will be evaluated on. +class FusionTarget; +def first : FusionTarget; +def second : FusionTarget; +def both : FusionTarget; + +// Base class of FusionPredicate, etc. The avaliable variables are: +// * const TargetInstrInfo &TII +// * const TargetSubtargetInfo &STI +// * const MachineRegisterInfo &MRI +// * const MachineInstr *FirstMI +// * const MachineInstr &SecondMI +class FusionPredicate { + FusionTarget Target = target; +} +class FirstFusionPredicate: FusionPredicate; +class SecondFusionPredicate: FusionPredicate; +class BothFusionPredicate: FusionPredicate; + +// FusionPredicate with raw code predicate. +class FusionPredicateWithCode : FusionPredicate { + code Predicate = pred; +} + +// FusionPredicate with MCInstPredicate. +class FusionPredicateWithMCInstPredicate + : FusionPredicate { + MCInstPredicate Predicate = pred; +} +class FirstFusionPredicateWithMCInstPredicate + : FusionPredicateWithMCInstPredicate; +class SecondFusionPredicateWithMCInstPredicate + : FusionPredicateWithMCInstPredicate; +// The pred will be applied on both firstMI and secondMI. +class BothFusionPredicateWithMCInstPredicate + : FusionPredicateWithMCInstPredicate; + +// Tie firstOpIdx and secondOpIdx. The operand of `FirstMI` at position +// `firstOpIdx` should be the same as the operand of `SecondMI` at position +// `secondOpIdx`. +class TieReg : BothFusionPredicate { + int FirstOpIdx = firstOpIdx; + int SecondOpIdx = secondOpIdx; +} + +// A predicate for wildcard. The generated code will be like: +// ``` +// if (!FirstMI) +// return ReturnValue; +// ``` +class WildcardPred : FirstFusionPredicate { + bit ReturnValue = ret; +} +def WildcardFalse : WildcardPred<0>; +def WildcardTrue : WildcardPred<1>; + +// Indicates that the destination register of `FirstMI` should have one use if +// it is a virtual register. +class OneUsePred : FirstFusionPredicate; +def OneUse : OneUsePred; + +// Handled by MacroFusionPredicatorEmitter backend. +// The generated predicator will be like: +// ``` +// bool isNAME(const TargetInstrInfo &TII, +// const TargetSubtargetInfo &STI, +// const MachineInstr *FirstMI, +// const MachineInstr &SecondMI) { +// auto &MRI = SecondMI.getMF()->getRegInfo(); +// /* Predicates */ +// return true; +// } +// ``` +class Fusion predicates> { + string Name = name; + list Predicates = predicates; +} + +// The generated predicator will be like: +// ``` +// bool isNAME(const TargetInstrInfo &TII, +// const TargetSubtargetInfo &STI, +// const MachineInstr *FirstMI, +// const MachineInstr &SecondMI) { +// auto &MRI = SecondMI.getMF()->getRegInfo(); +// /* Prolog */ +// /* Predicate for `SecondMI` */ +// /* Wildcard */ +// /* Predicate for `FirstMI` */ +// /* Check One Use */ +// /* Tie registers */ +// /* Epilog */ +// return true; +// } +// ``` +class SimpleFusion prolog = [], + list epilog = []> + : Fusion, + WildcardTrue, + FirstFusionPredicateWithMCInstPredicate, + SecondFusionPredicateWithMCInstPredicate< + CheckAny<[ + CheckIsVRegOperand<0>, + CheckSameRegOperand<0, 1> + ]>>, + OneUse, + TieReg<0, 1>, + ], + epilog)>; diff --git a/llvm/lib/CodeGen/MacroFusion.cpp b/llvm/lib/CodeGen/MacroFusion.cpp index fa5df68b8abcc..d79e9572050b0 100644 --- a/llvm/lib/CodeGen/MacroFusion.cpp +++ b/llvm/lib/CodeGen/MacroFusion.cpp @@ -137,19 +137,33 @@ namespace { /// Post-process the DAG to create cluster edges between instrs that may /// be fused by the processor into a single operation. class MacroFusion : public ScheduleDAGMutation { - ShouldSchedulePredTy shouldScheduleAdjacent; + std::vector Predicates; bool FuseBlock; bool scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU); public: - MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock) - : shouldScheduleAdjacent(shouldScheduleAdjacent), FuseBlock(FuseBlock) {} + MacroFusion(std::vector Predicates, bool FuseBlock) + : Predicates(std::move(Predicates)), FuseBlock(FuseBlock) {} void apply(ScheduleDAGInstrs *DAGInstrs) override; + + bool shouldScheduleAdjacent(const TargetInstrInfo &TII, + const TargetSubtargetInfo &STI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI); }; } // end anonymous namespace +bool MacroFusion::shouldScheduleAdjacent(const TargetInstrInfo &TII, + const TargetSubtargetInfo &STI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + return llvm::any_of(Predicates, [&](MacroFusionPredTy Predicate) { + return Predicate(TII, STI, FirstMI, SecondMI); + }); +} + void MacroFusion::apply(ScheduleDAGInstrs *DAG) { if (FuseBlock) // For each of the SUnits in the scheduling block, try to fuse the instr in @@ -197,17 +211,15 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU) } std::unique_ptr -llvm::createMacroFusionDAGMutation( - ShouldSchedulePredTy shouldScheduleAdjacent) { - if(EnableMacroFusion) - return std::make_unique(shouldScheduleAdjacent, true); +llvm::createMacroFusionDAGMutation(std::vector Predicates) { + if (EnableMacroFusion) + return std::make_unique(std::move(Predicates), true); return nullptr; } -std::unique_ptr -llvm::createBranchMacroFusionDAGMutation( - ShouldSchedulePredTy shouldScheduleAdjacent) { - if(EnableMacroFusion) - return std::make_unique(shouldScheduleAdjacent, false); +std::unique_ptr llvm::createBranchMacroFusionDAGMutation( + std::vector Predicates) { + if (EnableMacroFusion) + return std::make_unique(std::move(Predicates), false); return nullptr; } diff --git a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp index 6c97bc0568bde..9e3f1f4171611 100644 --- a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp @@ -14,16 +14,62 @@ using namespace llvm; +static cl::list MFusions("mfusion", cl::CommaSeparated, + cl::desc("Target specific macro fusions"), + cl::value_desc("a1,+a2,-a3,...")); + TargetSubtargetInfo::TargetSubtargetInfo( const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, ArrayRef PF, ArrayRef PD, const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC, - const unsigned *FP) - : MCSubtargetInfo(TT, CPU, TuneCPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP) {} + const unsigned *FP, ArrayRef MF) + : MCSubtargetInfo(TT, CPU, TuneCPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP), + MacroFusionTable(MF) { + // assert if MacroFusionTable is not sorted. + assert(llvm::is_sorted(MacroFusionTable)); + overrideFusionBits(); +} TargetSubtargetInfo::~TargetSubtargetInfo() = default; +void TargetSubtargetInfo::overrideFusionBits() { + if (MFusions.getNumOccurrences() != 0) { + for (std::string &MFusion : MFusions) { + char Prefix = MFusion[0]; + bool Disable = Prefix == '-'; + if (Prefix == '+' || Prefix == '-') + MFusion = MFusion.substr(1); + + // MacroFusionTable is sorted. + const auto *Pos = std::lower_bound( + MacroFusionTable.begin(), MacroFusionTable.end(), MFusion, + [](const MacroFusionEntry &LHS, const std::string &RHS) { + int CmpName = StringRef(LHS.Name).compare(RHS); + if (CmpName < 0) + return true; + if (CmpName > 0) + return false; + return false; + }); + + if (Pos == MacroFusionTable.end()) { + errs() << "'" << MFusion + << "' is not a recognized macro fusion for this " + << "target (ignoring it)\n"; + continue; + } + + // The index is the same as the enum value. + unsigned Idx = Pos - MacroFusionTable.begin(); + if (Disable) + disableMacroFusion(Idx); + else + enableMacroFusion(Idx); + } + } +} + bool TargetSubtargetInfo::enableAtomicExpand() const { return true; } @@ -58,3 +104,13 @@ bool TargetSubtargetInfo::useAA() const { } void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const { } + +std::vector TargetSubtargetInfo::getMacroFusions() const { + std::vector Fusions; + const MacroFusionBitset &Bits = getMacroFusionBits(); + for (unsigned I = 0; I < MacroFusionTable.size(); I++) + if (Bits[I]) + Fusions.push_back(MacroFusionTable[I].Pred); + + return Fusions; +} diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp index 990a693559a77..19c36cb0e58d9 100644 --- a/llvm/lib/MC/MCSchedule.cpp +++ b/llvm/lib/MC/MCSchedule.cpp @@ -37,6 +37,7 @@ const MCSchedModel MCSchedModel::Default = {DefaultIssueWidth, 0, 0, nullptr, + nullptr, nullptr}; int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI, diff --git a/llvm/lib/MC/MCSubtargetInfo.cpp b/llvm/lib/MC/MCSubtargetInfo.cpp index 8ee823e0377b7..8ea1aca92e048 100644 --- a/llvm/lib/MC/MCSubtargetInfo.cpp +++ b/llvm/lib/MC/MCSubtargetInfo.cpp @@ -215,6 +215,8 @@ void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef TuneCPU, CPUSchedModel = &getSchedModelForCPU(TuneCPU); else CPUSchedModel = &MCSchedModel::GetDefaultSchedModel(); + if (CPUSchedModel->getMacroFusionBits()) + FusionBits = *CPUSchedModel->getMacroFusionBits(); } void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef TuneCPU, diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp index 05d60872bf51a..8f46f3eabb3ef 100644 --- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -478,5 +478,5 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, std::unique_ptr llvm::createAArch64MacroFusionDAGMutation() { - return createMacroFusionDAGMutation(shouldScheduleAdjacent); + return createMacroFusionDAGMutation({shouldScheduleAdjacent}); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp index 0cbabf3895a67..b2b11d661523e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp @@ -60,7 +60,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII_, namespace llvm { std::unique_ptr createAMDGPUMacroFusionDAGMutation() { - return createMacroFusionDAGMutation(shouldScheduleAdjacent); + return createMacroFusionDAGMutation({shouldScheduleAdjacent}); } } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp index 29c9b9ccf2761..0bddeeef9e9b1 100644 --- a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp +++ b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp @@ -142,10 +142,10 @@ namespace { /// be turned into VOPD instructions /// Greedily pairs instruction candidates. O(n^2) algorithm. struct VOPDPairingMutation : ScheduleDAGMutation { - ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer + MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer VOPDPairingMutation( - ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer + MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer : shouldScheduleAdjacent(shouldScheduleAdjacent) {} void apply(ScheduleDAGInstrs *DAG) override { diff --git a/llvm/lib/Target/ARM/ARMMacroFusion.cpp b/llvm/lib/Target/ARM/ARMMacroFusion.cpp index 5aeb7abe92a38..7de117925e464 100644 --- a/llvm/lib/Target/ARM/ARMMacroFusion.cpp +++ b/llvm/lib/Target/ARM/ARMMacroFusion.cpp @@ -63,7 +63,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, } std::unique_ptr createARMMacroFusionDAGMutation() { - return createMacroFusionDAGMutation(shouldScheduleAdjacent); + return createMacroFusionDAGMutation({shouldScheduleAdjacent}); } } // end namespace llvm diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp index 7ad6ef8c39286..d6a4a5dd5faab 100644 --- a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp +++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp @@ -287,7 +287,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, namespace llvm { std::unique_ptr createPowerPCMacroFusionDAGMutation() { - return createMacroFusionDAGMutation(shouldScheduleAdjacent); + return createMacroFusionDAGMutation({shouldScheduleAdjacent}); } } // end namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp index 02a8d5c18fe1a..1b82cc8b5b858 100644 --- a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp +++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp @@ -65,5 +65,5 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, } std::unique_ptr llvm::createRISCVMacroFusionDAGMutation() { - return createMacroFusionDAGMutation(shouldScheduleAdjacent); + return createMacroFusionDAGMutation({shouldScheduleAdjacent}); } diff --git a/llvm/lib/Target/X86/X86MacroFusion.cpp b/llvm/lib/Target/X86/X86MacroFusion.cpp index 82667b8cdbdb8..382cc9a71c2a1 100644 --- a/llvm/lib/Target/X86/X86MacroFusion.cpp +++ b/llvm/lib/Target/X86/X86MacroFusion.cpp @@ -68,7 +68,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, namespace llvm { std::unique_ptr createX86MacroFusionDAGMutation() { - return createBranchMacroFusionDAGMutation(shouldScheduleAdjacent); + return createBranchMacroFusionDAGMutation({shouldScheduleAdjacent}); } } // end namespace llvm diff --git a/llvm/unittests/CodeGen/MFCommon.inc b/llvm/unittests/CodeGen/MFCommon.inc index 7de7eabdd1f60..954c7e3f9b8e2 100644 --- a/llvm/unittests/CodeGen/MFCommon.inc +++ b/llvm/unittests/CodeGen/MFCommon.inc @@ -76,7 +76,7 @@ class BogusSubtarget : public TargetSubtargetInfo { public: BogusSubtarget(TargetMachine &TM) : TargetSubtargetInfo(Triple(""), "", "", "", {}, {}, nullptr, nullptr, - nullptr, nullptr, nullptr, nullptr), + nullptr, nullptr, nullptr, nullptr, {}), FL(), TL(TM) {} ~BogusSubtarget() override {} diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt index 071ea3bc07054..f765cc36d3beb 100644 --- a/llvm/utils/TableGen/CMakeLists.txt +++ b/llvm/utils/TableGen/CMakeLists.txt @@ -72,6 +72,7 @@ add_tablegen(llvm-tblgen LLVM PredicateExpander.cpp PseudoLoweringEmitter.cpp CompressInstEmitter.cpp + MacroFusionPredicatorEmitter.cpp RegisterBankEmitter.cpp RegisterInfoEmitter.cpp SearchableTableEmitter.cpp diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp index 54463da198214..7411683363d29 100644 --- a/llvm/utils/TableGen/CodeGenSchedule.cpp +++ b/llvm/utils/TableGen/CodeGenSchedule.cpp @@ -226,6 +226,9 @@ CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK, // (For per-operand resources mapped to itinerary classes). collectProcUnsupportedFeatures(); + // Find MacroFusion records for each processor. + collectMacroFusions(); + // Infer new SchedClasses from SchedVariant. inferSchedClasses(); @@ -1168,6 +1171,12 @@ bool CodeGenSchedModels::hasItineraries() const { return false; } +bool CodeGenSchedModels::hasMacroFusions() const { + return llvm::any_of(ProcModels, [](const CodeGenProcModel &PM) { + return PM.hasMacroFusions(); + }); +} + // Gather the processor itineraries. void CodeGenSchedModels::collectProcItins() { LLVM_DEBUG(dbgs() << "\n+++ PROBLEM ITINERARIES (collectProcItins) +++\n"); @@ -1238,6 +1247,12 @@ void CodeGenSchedModels::collectProcUnsupportedFeatures() { ProcModel.ModelDef->getValueAsListOfDefs("UnsupportedFeatures")); } +void CodeGenSchedModels::collectMacroFusions() { + for (CodeGenProcModel &ProcModel : ProcModels) + append_range(ProcModel.MacroFusions, + ProcModel.ModelDef->getValueAsListOfDefs("MacroFusions")); +} + /// Infer new classes from existing classes. In the process, this may create new /// SchedWrites from sequences of existing SchedWrites. void CodeGenSchedModels::inferSchedClasses() { diff --git a/llvm/utils/TableGen/CodeGenSchedule.h b/llvm/utils/TableGen/CodeGenSchedule.h index 76ef1e4395307..317558c52fbef 100644 --- a/llvm/utils/TableGen/CodeGenSchedule.h +++ b/llvm/utils/TableGen/CodeGenSchedule.h @@ -238,6 +238,10 @@ struct CodeGenProcModel { // This list is empty if the Processor has no UnsupportedFeatures. RecVec UnsupportedFeaturesDefs; + // List of MacroFusion. + // This list is empty if the Processor has no MacroFusion. + RecVec MacroFusions; + // All read/write resources associated with this processor. RecVec WriteResDefs; RecVec ReadAdvanceDefs; @@ -260,6 +264,8 @@ struct CodeGenProcModel { Index(Idx), ModelName(std::move(Name)), ModelDef(MDef), ItinsDef(IDef), RetireControlUnit(nullptr), LoadQueue(nullptr), StoreQueue(nullptr) {} + bool hasMacroFusions() const { return !MacroFusions.empty(); } + bool hasItineraries() const { return !ItinsDef->getValueAsListOfDefs("IID").empty(); } @@ -508,6 +514,9 @@ class CodeGenSchedModels { // Return true if any processors have itineraries. bool hasItineraries() const; + // Return true if any processors have MacroFusions. + bool hasMacroFusions() const; + // Get a SchedWrite from its index. const CodeGenSchedRW &getSchedWrite(unsigned Idx) const { assert(Idx < SchedWrites.size() && "bad SchedWrite index"); @@ -610,6 +619,8 @@ class CodeGenSchedModels { void collectProcUnsupportedFeatures(); + void collectMacroFusions(); + void inferSchedClasses(); void checkMCInstPredicates() const; diff --git a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp new file mode 100644 index 0000000000000..46a62c8cc6d30 --- /dev/null +++ b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp @@ -0,0 +1,234 @@ +//===------ MacroFusionPredicatorEmitter.cpp - Generator for Fusion ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// MacroFusionPredicatorEmitter implements a TableGen-driven predicators +// generator for macro-op fusions. +// +//===---------------------------------------------------------------------===// + +#include "CodeGenTarget.h" +#include "PredicateExpander.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "macro-fusion-predicator" + +namespace { +class MacroFusionPredicatorEmitter { + RecordKeeper &Records; + CodeGenTarget Target; + + void emitMacroFusionEnum(std::vector Fusions, PredicateExpander &PE, + raw_ostream &OS); + void emitMacroFusionDecl(std::vector Fusions, PredicateExpander &PE, + raw_ostream &OS); + void emitMacroFusionImpl(std::vector Fusions, PredicateExpander &PE, + raw_ostream &OS); + void emitPredicates(std::vector &FirstPredicate, + PredicateExpander &PE, raw_ostream &OS); + void emitFirstPredicate(Record *SecondPredicate, PredicateExpander &PE, + raw_ostream &OS); + void emitSecondPredicate(Record *SecondPredicate, PredicateExpander &PE, + raw_ostream &OS); + void emitBothPredicate(Record *Predicates, PredicateExpander &PE, + raw_ostream &OS); + +public: + MacroFusionPredicatorEmitter(RecordKeeper &R) : Records(R), Target(R) {} + + void run(raw_ostream &OS); +}; +} // End anonymous namespace. + +void MacroFusionPredicatorEmitter::emitMacroFusionEnum( + std::vector Fusions, PredicateExpander &PE, raw_ostream &OS) { + unsigned N = Fusions.size(); + if (N == 0) + return; + // 256 is `MaxMacroFusions` defined in MCSchedule.h + if (N > 256) + PrintFatalError("Too many macro fusions! Please bump MaxMacroFusions!"); + + OS << "#ifdef GET_MACRO_FUSION_ENUM\n\n"; + OS << "namespace llvm {\n"; + OS << "namespace " << Target.getName() << " {\n"; + OS << "enum {\n"; + + for (unsigned Index = 0; Index < N; Index++) { + Record *Fusion = Fusions[Index]; + // Get and emit name + OS << " " << Fusion->getName() << " = " << Index << ",\n"; + } + + OS << "};\n"; + OS << "} // end namespace " << Target.getName() << "\n"; + OS << "} // end namespace llvm\n\n"; + OS << "#endif\n"; + OS << "#undef GET_MACRO_FUSION_ENUM\n\n"; +} + +void MacroFusionPredicatorEmitter::emitMacroFusionDecl( + std::vector Fusions, PredicateExpander &PE, raw_ostream &OS) { + OS << "#ifdef GET_" << Target.getName() << "_MACRO_FUSION_PRED_DECL\n\n"; + + for (Record *Fusion : Fusions) { + OS << "bool is" << Fusion->getName() << "(const TargetInstrInfo &, " + << "const TargetSubtargetInfo &, " + << "const MachineInstr *, " + << "const MachineInstr &);\n"; + } + + OS << "\n#endif\n"; + OS << "#undef GET_" << Target.getName() << "_MACRO_FUSION_PRED_DECL\n"; +} + +void MacroFusionPredicatorEmitter::emitMacroFusionImpl( + std::vector Fusions, PredicateExpander &PE, raw_ostream &OS) { + OS << "#ifdef GET_" << Target.getName() << "_MACRO_FUSION_PRED_IMPL\n\n"; + + for (Record *Fusion : Fusions) { + std::vector Predicates = + Fusion->getValueAsListOfDefs("Predicates"); + + OS << "bool is" << Fusion->getName() << "(\n"; + OS.indent(5) << "const TargetInstrInfo &TII,\n"; + OS.indent(5) << "const TargetSubtargetInfo &STI,\n"; + OS.indent(5) << "const MachineInstr *FirstMI,\n"; + OS.indent(5) << "const MachineInstr &SecondMI) {\n"; + OS.indent(2) << "auto &MRI = SecondMI.getMF()->getRegInfo();\n"; + + emitPredicates(Predicates, PE, OS); + + OS.indent(2) << "return true;\n"; + OS << "}\n"; + } + + OS << "\n#endif\n"; + OS << "#undef GET_" << Target.getName() << "_MACRO_FUSION_PRED_IMPL\n\n"; +} + +void MacroFusionPredicatorEmitter::emitPredicates( + std::vector &Predicates, PredicateExpander &PE, raw_ostream &OS) { + for (Record *Predicate : Predicates) { + Record *Target = Predicate->getValueAsDef("Target"); + if (Target->getName() == "first") + emitFirstPredicate(Predicate, PE, OS); + else if (Target->getName() == "second") + emitSecondPredicate(Predicate, PE, OS); + else if (Target->getName() == "both") + emitBothPredicate(Predicate, PE, OS); + else + PrintFatalError(Target->getLoc(), + "Unsupported 'FusionTarget': " + Target->getName()); + } +} + +void MacroFusionPredicatorEmitter::emitFirstPredicate(Record *Predicate, + PredicateExpander &PE, + raw_ostream &OS) { + if (Predicate->isSubClassOf("WildcardPred")) { + OS.indent(2) << "if (!FirstMI)\n"; + OS.indent(2) << " return " + << (Predicate->getValueAsBit("ReturnValue") ? "true" : "false") + << ";\n"; + } else if (Predicate->isSubClassOf("OneUsePred")) { + OS.indent(2) << "{\n"; + OS.indent(4) << "Register FirstDest = FirstMI->getOperand(0).getReg();\n"; + OS.indent(4) + << "if (FirstDest.isVirtual() && !MRI.hasOneNonDBGUse(FirstDest))\n"; + OS.indent(4) << " return false;\n"; + OS.indent(2) << "}\n"; + } else if (Predicate->isSubClassOf( + "FirstFusionPredicateWithMCInstPredicate")) { + OS.indent(2) << "{\n"; + OS.indent(4) << "const MachineInstr *MI = FirstMI;\n"; + OS.indent(4) << "if ("; + PE.setNegatePredicate(true); + PE.setIndentLevel(3); + PE.expandPredicate(OS, Predicate->getValueAsDef("Predicate")); + OS << ")\n"; + OS.indent(4) << " return false;\n"; + OS.indent(2) << "}\n"; + } else { + PrintFatalError(Predicate->getLoc(), + "Unsupported predicate for first instruction: " + + Predicate->getType()->getAsString()); + } +} + +void MacroFusionPredicatorEmitter::emitSecondPredicate(Record *Predicate, + PredicateExpander &PE, + raw_ostream &OS) { + if (Predicate->isSubClassOf("SecondFusionPredicateWithMCInstPredicate")) { + OS.indent(2) << "{\n"; + OS.indent(4) << "const MachineInstr *MI = &SecondMI;\n"; + OS.indent(4) << "if ("; + PE.setNegatePredicate(true); + PE.setIndentLevel(3); + PE.expandPredicate(OS, Predicate->getValueAsDef("Predicate")); + OS << ")\n"; + OS.indent(4) << " return false;\n"; + OS.indent(2) << "}\n"; + } else { + PrintFatalError(Predicate->getLoc(), + "Unsupported predicate for first instruction: " + + Predicate->getType()->getAsString()); + } +} + +void MacroFusionPredicatorEmitter::emitBothPredicate(Record *Predicate, + PredicateExpander &PE, + raw_ostream &OS) { + if (Predicate->isSubClassOf("FusionPredicateWithCode")) + OS << Predicate->getValueAsString("Predicate"); + else if (Predicate->isSubClassOf("BothFusionPredicateWithMCInstPredicate")) { + Record *MCPred = Predicate->getValueAsDef("Predicate"); + emitFirstPredicate(MCPred, PE, OS); + emitSecondPredicate(MCPred, PE, OS); + } else if (Predicate->isSubClassOf("TieReg")) { + int FirstOpIdx = Predicate->getValueAsInt("FirstOpIdx"); + int SecondOpIdx = Predicate->getValueAsInt("SecondOpIdx"); + OS.indent(2) << "if (!(FirstMI->getOperand(" << FirstOpIdx + << ").isReg() &&\n"; + OS.indent(2) << " SecondMI.getOperand(" << SecondOpIdx + << ").isReg() &&\n"; + OS.indent(2) << " FirstMI->getOperand(" << FirstOpIdx + << ").getReg() == SecondMI.getOperand(" << SecondOpIdx + << ").getReg()))\n"; + OS.indent(2) << " return false;\n"; + } else + PrintFatalError(Predicate->getLoc(), + "Unsupported predicate for both instruction: " + + Predicate->getType()->getAsString()); +} + +void MacroFusionPredicatorEmitter::run(raw_ostream &OS) { + // Emit file header. + emitSourceFileHeader("Macro Fusion Predicators", OS); + + PredicateExpander PE(Target.getName()); + PE.setByRef(false); + PE.setExpandForMC(false); + + std::vector Fusions = Records.getAllDerivedDefinitions("Fusion"); + // Sort macro fusions by name. + sort(Fusions, LessRecord()); + emitMacroFusionEnum(Fusions, PE, OS); + emitMacroFusionDecl(Fusions, PE, OS); + emitMacroFusionImpl(Fusions, PE, OS); +} + +static TableGen::Emitter::OptClass + X("gen-macro-fusion-pred", "Generate macro fusion predicators."); diff --git a/llvm/utils/TableGen/PredicateExpander.cpp b/llvm/utils/TableGen/PredicateExpander.cpp index 8f96d3307ded8..d3a73e02cd916 100644 --- a/llvm/utils/TableGen/PredicateExpander.cpp +++ b/llvm/utils/TableGen/PredicateExpander.cpp @@ -194,6 +194,11 @@ void PredicateExpander::expandCheckIsRegOperand(raw_ostream &OS, int OpIndex) { << "getOperand(" << OpIndex << ").isReg() "; } +void PredicateExpander::expandCheckIsVRegOperand(raw_ostream &OS, int OpIndex) { + OS << (shouldNegate() ? "!" : "") << "MI" << (isByRef() ? "." : "->") + << "getOperand(" << OpIndex << ").getReg().isVirtual()"; +} + void PredicateExpander::expandCheckIsImmOperand(raw_ostream &OS, int OpIndex) { OS << (shouldNegate() ? "!" : "") << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex << ").isImm() "; @@ -319,6 +324,9 @@ void PredicateExpander::expandPredicate(raw_ostream &OS, const Record *Rec) { if (Rec->isSubClassOf("CheckIsRegOperand")) return expandCheckIsRegOperand(OS, Rec->getValueAsInt("OpIndex")); + if (Rec->isSubClassOf("CheckIsVRegOperand")) + return expandCheckIsVRegOperand(OS, Rec->getValueAsInt("OpIndex")); + if (Rec->isSubClassOf("CheckIsImmOperand")) return expandCheckIsImmOperand(OS, Rec->getValueAsInt("OpIndex")); diff --git a/llvm/utils/TableGen/PredicateExpander.h b/llvm/utils/TableGen/PredicateExpander.h index 27f049a715aad..cfb0a3d51e677 100644 --- a/llvm/utils/TableGen/PredicateExpander.h +++ b/llvm/utils/TableGen/PredicateExpander.h @@ -75,6 +75,7 @@ class PredicateExpander { bool IsCheckAll); void expandTIIFunctionCall(raw_ostream &OS, StringRef MethodName); void expandCheckIsRegOperand(raw_ostream &OS, int OpIndex); + void expandCheckIsVRegOperand(raw_ostream &OS, int OpIndex); void expandCheckIsImmOperand(raw_ostream &OS, int OpIndex); void expandCheckInvalidRegOperand(raw_ostream &OS, int OpIndex); void expandCheckFunctionPredicate(raw_ostream &OS, StringRef MCInstFn, diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index f7a7172d61fc6..461bc67a1ab60 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -16,6 +16,7 @@ #include "PredicateExpander.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCInstrItineraries.h" @@ -133,6 +134,8 @@ class SubtargetEmitter { void EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS); void EmitSchedModel(raw_ostream &OS); + void emitMacroFusionBits(const CodeGenProcModel &ProcModel, raw_ostream &OS); + void emitMacroFusionTable(RecVec Fusions, raw_ostream &OS); void EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS); void ParseFeaturesFunction(raw_ostream &OS); @@ -869,6 +872,17 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel, OS << "};\n"; } +void SubtargetEmitter::emitMacroFusionBits(const CodeGenProcModel &ProcModel, + raw_ostream &OS) { + OS << "\nstatic const MacroFusionBitset " << ProcModel.ModelName + << "MacroFusionBits = {\n"; + std::vector Predicates; + for (auto *R : ProcModel.MacroFusions) + Predicates.push_back(" " + Target + "::" + R->getNameInitAsString()); + OS << llvm::join(Predicates, ",\n"); + OS << "\n};\n"; +} + // Find the WriteRes Record that defines processor resources for this // SchedWrite. Record *SubtargetEmitter::FindWriteResources( @@ -1441,6 +1455,8 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) { else if(!PM.ProcResourceDefs.empty()) PrintFatalError(PM.ModelDef->getLoc(), "SchedMachineModel defines " "ProcResources without defining WriteRes SchedWriteRes"); + if (PM.hasMacroFusions()) + emitMacroFusionBits(PM, OS); // Begin processor itinerary properties OS << "\n"; @@ -1487,7 +1503,11 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) { if (PM.hasExtraProcessorInfo()) OS << " &" << PM.ModelName << "ExtraInfo,\n"; else - OS << " nullptr // No extra processor descriptor\n"; + OS << " nullptr, // No extra processor descriptor\n"; + if (PM.hasMacroFusions()) { + OS << " &" << PM.ModelName << "MacroFusionBits,\n"; + } else + OS << " nullptr, // No macro fusions\n"; OS << "};\n"; } } @@ -1770,6 +1790,25 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName, PE.expandSTIPredicate(OS, Fn); } +void SubtargetEmitter::emitMacroFusionTable(RecVec Fusions, raw_ostream &OS) { + OS << "const llvm::MacroFusionEntry " << Target << "MacroFusionTable[] = {\n"; + + SmallSet Names; + for (auto &Fusion : Fusions) { + StringRef Name = Fusion->getValueAsString("Name"); + if (Name.empty()) + PrintFatalError(Fusion->getLoc(), + "The name of macro fusion cannot be empty"); + if (Names.contains(Name)) + PrintFatalError(Fusion->getLoc(), + "The name of macro fusion already exists"); + OS.indent(2) << "{\"" << Name << "\", " + << "llvm::is" + Fusion->getNameInitAsString() << "},\n"; + } + + OS << "};\n\n"; +} + void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS) { const CodeGenHwModes &CGH = TGT.getHwModes(); @@ -2001,6 +2040,13 @@ void SubtargetEmitter::run(raw_ostream &OS) { OS << "\n#ifdef GET_SUBTARGETINFO_CTOR\n"; OS << "#undef GET_SUBTARGETINFO_CTOR\n\n"; + std::vector Fusions = Records.getAllDerivedDefinitions("Fusion"); + // Sort macro fusions by name. + llvm::sort(Fusions, LessRecord()); + + if (!Fusions.empty()) + emitMacroFusionTable(Fusions, OS); + OS << "#include \"llvm/CodeGen/TargetSchedule.h\"\n\n"; OS << "namespace llvm {\n"; OS << "extern const llvm::SubtargetFeatureKV " << Target << "FeatureKV[];\n"; @@ -2035,11 +2081,15 @@ void SubtargetEmitter::run(raw_ostream &OS) { << Target << "ReadAdvanceTable, "; OS << '\n'; OS.indent(24); if (SchedModels.hasItineraries()) { - OS << Target << "Stages, " - << Target << "OperandCycles, " - << Target << "ForwardingPaths"; + OS << Target << "Stages, " << Target << "OperandCycles, " << Target + << "ForwardingPaths, "; } else - OS << "nullptr, nullptr, nullptr"; + OS << "nullptr, nullptr, nullptr, "; + if (!Fusions.empty()) { + OS << "ArrayRef(" << Target << "MacroFusionTable, " << Fusions.size() + << ")"; + } else + OS << "std::nullopt"; OS << ") {}\n\n"; EmitSchedModelHelpers(ClassName, OS);