-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] CodeGen for GFX12 VBUFFER instructions #75492
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Mirko Brkušanin (mbrkusanin) ChangesPatch is 1.67 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75492.diff 58 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 799e102d56174d..89403ac3df4e78 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -816,6 +816,12 @@ def FeatureVGPRSingleUseHintInsts : SubtargetFeature<"vgpr-singleuse-hint",
"Has single-use VGPR hint instructions"
>;
+def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset",
+ "HasRestrictedSOffset",
+ "true",
+ "Has restricted SOffset (immediate not supported)."
+>;
+
//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
@@ -1461,6 +1467,7 @@ def FeatureISAVersion12 : FeatureSet<
FeaturePackedTID,
FeatureVcmpxPermlaneHazard,
FeatureSALUFloatInsts,
+ FeatureHasRestrictedSOffset,
FeatureVGPRSingleUseHintInsts,
FeatureMADIntraFwdBug]>;
@@ -1773,6 +1780,11 @@ def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">,
def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
AssemblerPredicate<(all_of (not FeatureUnpackedD16VMem))>;
+def HasRestrictedSOffset : Predicate<"Subtarget->hasRestrictedSOffset()">,
+ AssemblerPredicate<(all_of FeatureHasRestrictedSOffset)>;
+def HasUnrestrictedSOffset : Predicate<"!Subtarget->hasRestrictedSOffset()">,
+ AssemblerPredicate<(all_of (not FeatureHasRestrictedSOffset))>;
+
def D16PreservesUnusedBits :
Predicate<"Subtarget->d16PreservesUnusedBits()">,
AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureSRAMECC))>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index c61aab4a45c6ad..2b85024a9b40be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -105,6 +105,11 @@ def gi_global_saddr :
def gi_mubuf_scratch_offset :
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffset">,
GIComplexPatternEquiv<MUBUFScratchOffset>;
+
+def gi_buf_soffset :
+ GIComplexOperandMatcher<s32, "selectBUFSOffset">,
+ GIComplexPatternEquiv<BUFSOffset>;
+
def gi_mubuf_scratch_offen :
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffen">,
GIComplexPatternEquiv<MUBUFScratchOffen>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 66ba08ef0dc12a..98d90814f223c3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1319,7 +1319,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
- SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ SOffset = Subtarget->hasRestrictedSOffset()
+ ? CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
+ : CurDAG->getTargetConstant(0, DL, MVT::i32);
ConstantSDNode *C1 = nullptr;
SDValue N0 = Addr;
@@ -1374,7 +1376,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
return true;
}
- if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
+ if (TII->isLegalMUBUFImmOffset(C1->getZExtValue())) {
// Legal offset for instruction.
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
return true;
@@ -1448,7 +1451,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
// Don't fold null pointer.
if (Imm != NullPtr) {
- const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
+ const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(*Subtarget);
SDValue HighBits =
CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32);
MachineSDNode *MovHighBits = CurDAG->getMachineNode(
@@ -1482,8 +1485,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
// Therefore it should be safe to fold any VGPR offset on gfx9 into the
// MUBUF vaddr, but not on older subtargets which can only do this if the
// sign bit is known 0.
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
- if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
+ if (TII->isLegalMUBUFImmOffset(C1->getZExtValue()) &&
(!Subtarget->privateMemoryResourceIsRangeChecked() ||
CurDAG->SignBitIsZero(N0))) {
std::tie(VAddr, SOffset) = foldFrameIndex(N0);
@@ -1515,6 +1519,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
SDValue &Offset) const {
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
MachineFunction &MF = CurDAG->getMachineFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
SDLoc DL(Addr);
@@ -1531,14 +1536,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
if (Addr.getOpcode() == ISD::ADD) {
// Add (CopyFromReg <sgpr>) <constant>
CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
+ if (!CAddr || !TII->isLegalMUBUFImmOffset(CAddr->getZExtValue()))
return false;
if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
return false;
SOffset = Addr.getOperand(0);
} else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
- SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
+ TII->isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
// <constant>
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
} else {
@@ -1555,8 +1560,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &SOffset, SDValue &Offset
) const {
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
return false;
@@ -1577,6 +1581,21 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
return false;
}
+bool AMDGPUDAGToDAGISel::SelectBUFSOffset(SDValue ByteOffsetNode,
+ SDValue &SOffset) const {
+ if (Subtarget->hasRestrictedSOffset()) {
+ if (auto SOffsetConst = dyn_cast<ConstantSDNode>(ByteOffsetNode)) {
+ if (SOffsetConst->isZero()) {
+ SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
+ return true;
+ }
+ }
+ }
+
+ SOffset = ByteOffsetNode;
+ return true;
+}
+
// Find a load or store from corresponding pattern root.
// Roots may be build_vector, bitconvert or their combinations.
static MemSDNode* findMemSDNode(SDNode *N) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 618c5e02c09406..374108af08cd5c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -179,6 +179,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
+ bool SelectBUFSOffset(SDValue Addr, SDValue &SOffset) const;
bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, uint64_t FlatVariant) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 75fac09d0b99fa..3aff8bd86bf766 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3223,6 +3223,7 @@ bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
}
bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
+ assert(!AMDGPU::isGFX12Plus(STI));
unsigned Opc;
unsigned Size = MI.getOperand(3).getImm();
@@ -3289,8 +3290,8 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
MIB.add(MI.getOperand(5 + OpOffset)); // soffset
MIB.add(MI.getOperand(6 + OpOffset)); // imm offset
unsigned Aux = MI.getOperand(7 + OpOffset).getImm();
- MIB.addImm(Aux & AMDGPU::CPol::ALL); // cpol
- MIB.addImm((Aux >> 3) & 1); // swz
+ MIB.addImm(Aux & AMDGPU::CPol::ALL); // cpol
+ MIB.addImm(Aux & AMDGPU::CPol::SWZ_pregfx12 ? 1 : 0); // swz
MachineMemOperand *LoadMMO = *MI.memoperands_begin();
MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
@@ -4430,7 +4431,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
// TODO: Should this be inside the render function? The iterator seems to
// move.
- const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
+ const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(*Subtarget);
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
HighBits)
.addImm(Offset & ~MaxOffset);
@@ -4462,7 +4463,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
int64_t ConstOffset;
std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
if (ConstOffset != 0) {
- if (SIInstrInfo::isLegalMUBUFImmOffset(ConstOffset) &&
+ if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
(!STI.privateMemoryResourceIsRangeChecked() ||
KB->signBitIsZero(PtrBase))) {
const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
@@ -4541,6 +4542,11 @@ bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(Register Addr) const {
if (isNoUnsignedWrap(AddrMI))
return true;
+ // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative
+ // values.
+ if (AMDGPU::isGFX12Plus(STI))
+ return true;
+
Register LHS = AddrMI->getOperand(1).getReg();
Register RHS = AddrMI->getOperand(2).getReg();
@@ -4645,7 +4651,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
if (mi_match(Reg, *MRI,
m_GPtrAdd(m_Reg(BasePtr),
m_any_of(m_ICst(Offset), m_Copy(m_ICst(Offset)))))) {
- if (!SIInstrInfo::isLegalMUBUFImmOffset(Offset))
+ if (!TII.isLegalMUBUFImmOffset(Offset))
return {};
MachineInstr *BasePtrDef = getDefIgnoringCopies(BasePtr, *MRI);
Register WaveBase = getWaveAddress(BasePtrDef);
@@ -4664,7 +4670,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
}
if (!mi_match(Root.getReg(), *MRI, m_ICst(Offset)) ||
- !SIInstrInfo::isLegalMUBUFImmOffset(Offset))
+ !TII.isLegalMUBUFImmOffset(Offset))
return {};
return {{
@@ -4907,7 +4913,7 @@ bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr) const {
/// component.
void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
MachineIRBuilder &B, Register &SOffset, int64_t &ImmOffset) const {
- if (SIInstrInfo::isLegalMUBUFImmOffset(ImmOffset))
+ if (TII.isLegalMUBUFImmOffset(ImmOffset))
return;
// Illegal offset, store it in soffset.
@@ -5016,6 +5022,8 @@ AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { // soffset
if (SOffset)
MIB.addReg(SOffset);
+ else if (STI.hasRestrictedSOffset())
+ MIB.addReg(AMDGPU::SGPR_NULL);
else
MIB.addImm(0);
},
@@ -5044,6 +5052,8 @@ AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { // soffset
if (SOffset)
MIB.addReg(SOffset);
+ else if (STI.hasRestrictedSOffset())
+ MIB.addReg(AMDGPU::SGPR_NULL);
else
MIB.addImm(0);
},
@@ -5054,6 +5064,17 @@ AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
}};
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectBUFSOffset(MachineOperand &Root) const {
+
+ Register SOffset = Root.getReg();
+
+ if (STI.hasRestrictedSOffset() && mi_match(SOffset, *MRI, m_ZeroInt()))
+ SOffset = AMDGPU::SGPR_NULL;
+
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }}};
+}
+
/// Get an immediate that must be 32-bits, and treated as zero extended.
static std::optional<uint64_t>
getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI) {
@@ -5448,14 +5469,19 @@ void AMDGPUInstructionSelector::renderExtractCPol(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(OpIdx >= 0 && "expected to match an immediate operand");
- MIB.addImm(MI.getOperand(OpIdx).getImm() & AMDGPU::CPol::ALL);
+ MIB.addImm(MI.getOperand(OpIdx).getImm() &
+ (AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::ALL
+ : AMDGPU::CPol::ALL_pregfx12));
}
void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(OpIdx >= 0 && "expected to match an immediate operand");
- MIB.addImm((MI.getOperand(OpIdx).getImm() >> 3) & 1);
+ const bool Swizzle = MI.getOperand(OpIdx).getImm() &
+ (AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::SWZ
+ : AMDGPU::CPol::SWZ_pregfx12);
+ MIB.addImm(Swizzle);
}
void AMDGPUInstructionSelector::renderSetGLC(MachineInstrBuilder &MIB,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 00ff1747ce57a3..ab7cc0a6beb8c2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -292,6 +292,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectMUBUFOffsetImpl(MachineOperand &Root, Register &RSrcReg,
Register &SOffset, int64_t &Offset) const;
+ InstructionSelector::ComplexRendererFns
+ selectBUFSOffset(MachineOperand &Root) const;
+
InstructionSelector::ComplexRendererFns
selectMUBUFAddr64(MachineOperand &Root) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 489b4f5a8d86a5..d35c033723e604 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -5412,7 +5412,7 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
std::pair<Register, unsigned>
AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
- const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
+ const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset(ST);
Register BaseReg;
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 03b6d19b2b3c06..afda6a058922d8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1784,7 +1784,7 @@ getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
std::pair<Register, unsigned>
AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
- const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
+ const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset(Subtarget);
Register BaseReg;
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 9832d89c6ac6f7..43d35fa5291ca0 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -12,6 +12,8 @@ def MUBUFOffset : ComplexPattern<iPTR, 3, "SelectMUBUFOffset">;
def MUBUFScratchOffen : ComplexPattern<iPTR, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
def MUBUFScratchOffset : ComplexPattern<iPTR, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
+def BUFSOffset : ComplexPattern<iPTR, 1, "SelectBUFSOffset">;
+
def BUFAddrKind {
int Offset = 0;
int OffEn = 1;
@@ -521,11 +523,18 @@ class MUBUF_Addr64_Load_Pat <Instruction inst,
(load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i32:$offset))
>;
-multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
+multiclass MUBUF_Pseudo_Load_Pats_Common<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
def : MUBUF_Offset_Load_Pat<!cast<Instruction>(BaseInst#"_OFFSET"), load_vt, ld>;
def : MUBUF_Addr64_Load_Pat<!cast<Instruction>(BaseInst#"_ADDR64"), load_vt, ld>;
}
+multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag>{
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : MUBUF_Pseudo_Load_Pats_Common<BaseInst, load_vt, ld>;
+ }
+ defm : MUBUF_Pseudo_Load_Pats_Common<BaseInst # "_VBUFFER", load_vt, ld>;
+}
+
multiclass MUBUF_Pseudo_Loads_Helper<string opName, ValueType load_vt,
bit TiedDest, bit isLds, bit isTFE, bit hasGFX12Enc> {
defvar legal_load_vt = !if(!eq(load_vt, v3f16), v4f16, load_vt);
@@ -606,7 +615,7 @@ class MUBUF_Store_Pseudo <string opName,
let tfe = isTFE;
}
-multiclass MUBUF_Pseudo_Store_Pats<string BaseInst, ValueType store_vt = i32, SDPatternOperator st = null_frag> {
+multiclass MUBUF_Pseudo_Store_Pats_Common<string BaseInst, ValueType store_vt = i32, SDPatternOperator st = null_frag> {
def : GCNPat <
(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset)),
@@ -617,6 +626,13 @@ multiclass MUBUF_Pseudo_Store_Pats<string BaseInst, ValueType store_vt = i32, SD
(!cast<MUBUF_Pseudo>(BaseInst # _ADDR64) store_vt:$vdata, i64:$vaddr, v4i32:$srsrc, i32:$soffset, i32:$offset)>;
}
+multiclass MUBUF_Pseudo_Store_Pats<string BaseInst, ValueType store_vt = i32, SDPatternOperator st = null_frag> {
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : MUBUF_Pseudo_Store_Pats_Common<BaseInst, store_vt, st>;
+ }
+ defm : MUBUF_Pseudo_Store_Pats_Common<BaseInst # "_VBUFFER", store_vt, st>;
+}
+
multiclass MUBUF_Pseudo_Stores_Helper<string opName, ValueType store_vt,
bit isTFE, bit hasGFX12Enc> {
defvar legal_store_vt = !if(!eq(store_vt, v3f16), v4f16, store_vt);
@@ -1314,33 +1330,33 @@ def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">;
// buffer_load/store_format patterns
//===----------------------------------------------------------------------===//
-multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
+multiclass MUBUF_LoadIntrinsicPat_Common<SDPatternOperator name, ValueType vt,
string opcode, ValueType memoryVt = vt> {
defvar st = !if(!eq(memoryVt, vt), name, mubuf_intrinsic_load<name, memoryVt>);
def : GCNPat<
- (vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, 0, 0, (BUFSOffset i32:$soffset), timm:$offset,
timm:$auxiliary, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
def : GCNPat<
- (vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, 0, i32:$voffset, (BUFSOffset i32:$soffset), timm:$offset,
timm:$auxiliary, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(extract_cpol $auxili...
[truncated]
|
|
df9a07c
to
1c0c739
Compare
We may need to also update the following files with gfx12 run lines (can be a separate patch):
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
226230a
to
165bbf4
Compare
Rebased |
165bbf4
to
70b51f1
Compare
; GFX12-NEXT: global_load_b32 v0, v[0:1], off | ||
; GFX12-NEXT: s_waitcnt vmcnt(0) | ||
; GFX12-NEXT: s_setpc_b64 s[30:31] | ||
; GCN-GFX12-LABEL: v_extract_v64i32_varidx: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These prefixes look wrong. I removed them in 26ff659.
No description provided.