diff --git a/llvm/include/llvm/CodeGen/RegisterBankInfo.h b/llvm/include/llvm/CodeGen/RegisterBankInfo.h index bba4f1f025a06..2606b04a02060 100644 --- a/llvm/include/llvm/CodeGen/RegisterBankInfo.h +++ b/llvm/include/llvm/CodeGen/RegisterBankInfo.h @@ -435,7 +435,7 @@ class RegisterBankInfo { /// Get the MinimalPhysRegClass for Reg. /// \pre Reg is a physical register. - const TargetRegisterClass & + const TargetRegisterClass * getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const; /// Try to get the mapping of \p MI. diff --git a/llvm/lib/CodeGen/RegisterBankInfo.cpp b/llvm/lib/CodeGen/RegisterBankInfo.cpp index 27ed17b9f4f61..801b82e596739 100644 --- a/llvm/lib/CodeGen/RegisterBankInfo.cpp +++ b/llvm/lib/CodeGen/RegisterBankInfo.cpp @@ -79,13 +79,13 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const { const RegisterBank * RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { - if (Reg.isPhysical()) { + if (!Reg.isVirtual()) { // FIXME: This was probably a copy to a virtual register that does have a // type we could use. - return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI), LLT()); + const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg, TRI); + return RC ? &getRegBankFromRegClass(*RC, LLT()) : nullptr; } - assert(Reg && "NoRegister does not have a register bank"); const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); if (auto *RB = RegClassOrBank.dyn_cast()) return RB; @@ -94,16 +94,16 @@ RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, return nullptr; } -const TargetRegisterClass & +const TargetRegisterClass * RegisterBankInfo::getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const { assert(Reg.isPhysical() && "Reg must be a physreg"); const auto &RegRCIt = PhysRegMinimalRCs.find(Reg); if (RegRCIt != PhysRegMinimalRCs.end()) - return *RegRCIt->second; - const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClass(Reg); + return RegRCIt->second; + const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClassLLT(Reg, LLT()); PhysRegMinimalRCs[Reg] = PhysRC; - return *PhysRC; + return PhysRC; } const RegisterBank *RegisterBankInfo::getRegBankFromConstraints( @@ -498,7 +498,7 @@ unsigned RegisterBankInfo::getSizeInBits(Register Reg, // Instead, we need to access a register class that contains Reg and // get the size of that register class. // Because this is expensive, we'll cache the register class by calling - auto *RC = &getMinimalPhysRegClass(Reg, TRI); + auto *RC = getMinimalPhysRegClass(Reg, TRI); assert(RC && "Expecting Register class"); return TRI.getRegSizeInBits(*RC); } diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 2b1646af175f9..4177a8307ad8e 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H #include "AMDGPUCallLowering.h" +#include "AMDGPURegisterBankInfo.h" #include "AMDGPUSubtarget.h" #include "SIFrameLowering.h" #include "SIISelLowering.h" @@ -51,7 +52,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, std::unique_ptr InlineAsmLoweringInfo; std::unique_ptr InstSelector; std::unique_ptr Legalizer; - std::unique_ptr RegBankInfo; + std::unique_ptr RegBankInfo; protected: // Basic subtarget description. @@ -249,7 +250,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return Legalizer.get(); } - const RegisterBankInfo *getRegBankInfo() const override { + const AMDGPURegisterBankInfo *getRegBankInfo() const override { return RegBankInfo.get(); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 7dfcca63542c0..e3f06fe8c3b27 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -8403,6 +8403,22 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const { InstructionUniformity SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const { + unsigned opcode = MI.getOpcode(); + if (MI.isCopy()) { + const MachineOperand &srcOp = MI.getOperand(1); + if (srcOp.isReg() && srcOp.getReg().isPhysical()) { + const TargetRegisterClass *regClass = + RI.getPhysRegBaseClass(srcOp.getReg()); + return RI.isSGPRClass(regClass) ? InstructionUniformity::AlwaysUniform + : InstructionUniformity::NeverUniform; + } + return InstructionUniformity::Default; + } + + // GMIR handling + if (MI.isPreISelOpcode()) + return SIInstrInfo::getGenericInstructionUniformity(MI); + // Atomics are divergent because they are executed sequentially: when an // atomic operation refers to the same address in each thread, then each // thread after the first sees the value written by the previous thread as @@ -8429,49 +8445,32 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const { return InstructionUniformity::Default; } - unsigned opcode = MI.getOpcode(); - if (opcode == AMDGPU::COPY) { - const MachineOperand &srcOp = MI.getOperand(1); - if (srcOp.isReg() && srcOp.getReg().isPhysical()) { - const TargetRegisterClass *regClass = RI.getPhysRegBaseClass(srcOp.getReg()); - return RI.isSGPRClass(regClass) ? InstructionUniformity::AlwaysUniform - : InstructionUniformity::NeverUniform; - } - return InstructionUniformity::Default; - } - if (opcode == AMDGPU::INLINEASM || opcode == AMDGPU::INLINEASM_BR) { - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); - for (auto &op : MI.operands()) { - if (!op.isReg() || !op.isDef()) - continue; - auto *RC = MRI.getRegClass(op.getReg()); - if (!RC || RI.isDivergentRegClass(RC)) - return InstructionUniformity::NeverUniform; - } - return InstructionUniformity::AlwaysUniform; - } if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32) return InstructionUniformity::AlwaysUniform; if (opcode == AMDGPU::V_WRITELANE_B32) return InstructionUniformity::NeverUniform; - // GMIR handling - if (SIInstrInfo::isGenericOpcode(opcode)) - return SIInstrInfo::getGenericInstructionUniformity(MI); - - // Handling $vpgr reads - for (auto srcOp : MI.operands()) { - if (srcOp.isReg() && srcOp.getReg().isPhysical()) { - const TargetRegisterClass *regClass = RI.getPhysRegBaseClass(srcOp.getReg()); + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + const AMDGPURegisterBankInfo *RBI = ST.getRegBankInfo(); + + // FIXME: It's conceptually broken to report this for an instruction, and not + // a specific def operand. For inline asm in particular, there could be mixed + // uniform and divergent results. + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + const MachineOperand &SrcOp = MI.getOperand(I); + if (!SrcOp.isReg()) + continue; - // If the class is missing it's an unallocatable scalar of some kind. - if (!regClass) - continue; + Register Reg = SrcOp.getReg(); + if (!Reg || !SrcOp.readsReg()) + continue; - if (RI.isVGPRClass(regClass)) - return InstructionUniformity::NeverUniform; - } + // If RegBank is null, this is unassigned or an unallocatable special + // register, which are all scalars. + const RegisterBank *RegBank = RBI->getRegBank(Reg, MRI, RI); + if (RegBank && RegBank->getID() != AMDGPU::SGPRRegBankID) + return InstructionUniformity::NeverUniform; } // TODO: Uniformity check condtions above can be rearranged for more