-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NFC][AMDGPU] Construct MMO inside getTgtMemIntrinsic. #83554
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-amdgpu Author: None (sstipanovic) ChangesIn #73613 it was suggested to split the PR in 2 parts. This is the first part, constructing MMO inside of getTgtMemIntrinsic. Patch is 946.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83554.diff 63 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index f2e00aab8d5da27..f58ae3acf03bc18 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -1160,6 +1161,9 @@ class TargetLoweringBase {
MaybeAlign align = Align(1); // alignment
MachineMemOperand::Flags flags = MachineMemOperand::MONone;
+
+ MachineMemOperand *MMO = nullptr;
+
IntrinsicInfo() = default;
};
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 7c986dbbc2c7c88..d9a3aab5dcc28b1 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2643,8 +2643,12 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
else if (Info.fallbackAddressSpace)
MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
- MIB.addMemOperand(
- MF->getMachineMemOperand(MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata()));
+
+ if (Info.MMO)
+ MIB.addMemOperand(Info.MMO);
+ else
+ MIB.addMemOperand(MF->getMachineMemOperand(
+ MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata()));
}
return true;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ab2f42d2024ccc2..459987604a4191a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5090,9 +5090,13 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
else if (Info.fallbackAddressSpace)
MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
- Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops,
- Info.memVT, MPI, Info.align, Info.flags,
- Info.size, I.getAAMetadata());
+ if (Info.MMO)
+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops,
+ Info.memVT, Info.MMO);
+ else
+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops,
+ Info.memVT, MPI, Info.align, Info.flags,
+ Info.size, I.getAAMetadata());
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 84ef9679ab95635..3dceab87fb8c931 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17,11 +17,14 @@
#include "AMDGPUTargetMachine.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -34,6 +37,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -1158,9 +1162,36 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &CI,
MachineFunction &MF,
unsigned IntrID) const {
- Info.flags = MachineMemOperand::MONone;
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
+ std::optional<unsigned> FallbackAddressSpace;
+ MaybeAlign BaseAlignment = Align(1);
+ PointerUnion<const Value *, const PseudoSourceValue *> PtrVal;
+ uint64_t Size = 0;
+ int Offset = 0;
+
+ auto CreateMMO = [&]() {
+ MachinePointerInfo PtrInfo;
+ if (PtrVal)
+ PtrInfo = MachinePointerInfo(PtrVal, Offset);
+ else if (FallbackAddressSpace)
+ PtrInfo = MachinePointerInfo(*FallbackAddressSpace);
+
+ if (!Size && Info.memVT.isScalableVector())
+ Size = MemoryLocation::UnknownSize;
+ else if (!Size)
+ Size = Info.memVT.getStoreSize();
+
+ Type *Ty = Info.memVT == MVT::iPTR
+ ? PointerType::get(CI.getContext(), 0)
+ : Info.memVT.getTypeForEVT(CI.getContext());
+ Align Alignment = MF.getDataLayout().getABITypeAlign(Ty);
+ return MF.getMachineMemOperand(PtrInfo, Flags, Size,
+ BaseAlignment.value_or(Alignment),
+ CI.getAAMetadata());
+ };
+
if (CI.hasMetadata(LLVMContext::MD_invariant_load))
- Info.flags |= MachineMemOperand::MOInvariant;
+ Flags |= MachineMemOperand::MOInvariant;
if (const AMDGPU::RsrcIntrinsic *RsrcIntr =
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
@@ -1171,10 +1202,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return false;
// TODO: Should images get their own address space?
- Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
+ FallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
if (RsrcIntr->IsImage)
- Info.align.reset();
+ BaseAlignment.reset();
Value *RsrcArg = CI.getArgOperand(RsrcIntr->RsrcArg);
if (auto *RsrcPtrTy = dyn_cast<PointerType>(RsrcArg->getType())) {
@@ -1184,13 +1215,13 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// those pointers. Cases like "this points at the same value
// but with a different offset" are handled in
// areMemAccessesTriviallyDisjoint.
- Info.ptrVal = RsrcArg;
+ PtrVal = RsrcArg;
}
auto *Aux = cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1));
if (Aux->getZExtValue() & AMDGPU::CPol::VOLATILE)
- Info.flags |= MachineMemOperand::MOVolatile;
- Info.flags |= MachineMemOperand::MODereferenceable;
+ Flags |= MachineMemOperand::MOVolatile;
+ Flags |= MachineMemOperand::MODereferenceable;
if (ME.onlyReadsMemory()) {
unsigned MaxNumLanes = 4;
@@ -1213,7 +1244,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// FIXME: What does alignment mean for an image?
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.flags |= MachineMemOperand::MOLoad;
+ Flags |= MachineMemOperand::MOLoad;
} else if (ME.onlyWritesMemory()) {
Info.opc = ISD::INTRINSIC_VOID;
@@ -1225,15 +1256,17 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
} else
Info.memVT = EVT::getEVT(DataTy);
- Info.flags |= MachineMemOperand::MOStore;
+ Flags |= MachineMemOperand::MOStore;
} else {
// Atomic
Info.opc = CI.getType()->isVoidTy() ? ISD::INTRINSIC_VOID :
ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
- Info.flags |= MachineMemOperand::MOLoad |
- MachineMemOperand::MOStore |
- MachineMemOperand::MODereferenceable;
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable;
+
+ // XXX - Should this be volatile without known ordering?
+ Flags |= MachineMemOperand::MOVolatile;
switch (IntrID) {
default:
@@ -1246,11 +1279,13 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
- Info.ptrVal = CI.getArgOperand(1);
+ PtrVal = CI.getArgOperand(1);
+ Info.MMO = CreateMMO();
return true;
}
}
}
+ Info.MMO = CreateMMO();
return true;
}
@@ -1262,70 +1297,72 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::amdgcn_ds_fmax: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
- Info.ptrVal = CI.getOperand(0);
- Info.align.reset();
- Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ PtrVal = CI.getOperand(0);
+ BaseAlignment.reset();
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(4));
if (!Vol->isZero())
- Info.flags |= MachineMemOperand::MOVolatile;
+ Flags |= MachineMemOperand::MOVolatile;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_buffer_atomic_fadd: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getOperand(0)->getType());
- Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
- Info.align.reset();
- Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ FallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
+ BaseAlignment.reset();
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
if (!Vol || !Vol->isZero())
- Info.flags |= MachineMemOperand::MOVolatile;
-
+ Flags |= MachineMemOperand::MOVolatile;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
case Intrinsic::amdgcn_ds_sub_gs_reg_rtn: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getOperand(0)->getType());
- Info.ptrVal = nullptr;
- Info.fallbackAddressSpace = AMDGPUAS::STREAMOUT_REGISTER;
- Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ PtrVal = nullptr;
+ FallbackAddressSpace = AMDGPUAS::STREAMOUT_REGISTER;
+ Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_ds_append:
case Intrinsic::amdgcn_ds_consume: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
- Info.ptrVal = CI.getOperand(0);
- Info.align.reset();
- Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ PtrVal = CI.getOperand(0);
+ BaseAlignment.reset();
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(1));
if (!Vol->isZero())
- Info.flags |= MachineMemOperand::MOVolatile;
-
+ Flags |= MachineMemOperand::MOVolatile;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_global_atomic_csub: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
- Info.ptrVal = CI.getOperand(0);
- Info.align.reset();
- Info.flags |= MachineMemOperand::MOLoad |
- MachineMemOperand::MOStore |
- MachineMemOperand::MOVolatile;
+ PtrVal = CI.getOperand(0);
+ BaseAlignment.reset();
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType()); // XXX: what is correct VT?
- Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
- Info.align.reset();
- Info.flags |= MachineMemOperand::MOLoad |
- MachineMemOperand::MODereferenceable;
+ FallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
+ BaseAlignment.reset();
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_global_atomic_fadd:
@@ -1344,20 +1381,21 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
- Info.ptrVal = CI.getOperand(0);
- Info.align.reset();
- Info.flags |= MachineMemOperand::MOLoad |
- MachineMemOperand::MOStore |
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOVolatile;
+ PtrVal = CI.getOperand(0);
+ BaseAlignment.reset();
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOVolatile;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_global_load_tr: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
- Info.ptrVal = CI.getOperand(0);
- Info.align.reset();
- Info.flags |= MachineMemOperand::MOLoad;
+ PtrVal = CI.getOperand(0);
+ BaseAlignment.reset();
+ Flags |= MachineMemOperand::MOLoad;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_ds_gws_init:
@@ -1372,25 +1410,29 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
static_cast<const GCNTargetMachine &>(getTargetMachine());
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- Info.ptrVal = MFI->getGWSPSV(TM);
+ PtrVal = MFI->getGWSPSV(TM);
// This is an abstract access, but we need to specify a type and size.
Info.memVT = MVT::i32;
- Info.size = 4;
- Info.align = Align(4);
+ Size = 4;
+ BaseAlignment = Align(4);
if (IntrID == Intrinsic::amdgcn_ds_gws_barrier)
- Info.flags |= MachineMemOperand::MOLoad;
+ Flags |= MachineMemOperand::MOLoad;
else
- Info.flags |= MachineMemOperand::MOStore;
+ Flags |= MachineMemOperand::MOStore;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_global_load_lds: {
Info.opc = ISD::INTRINSIC_VOID;
unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
- Info.ptrVal = CI.getArgOperand(1);
- Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile;
+ PtrVal = CI.getArgOperand(1);
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_ds_bvh_stack_rtn: {
@@ -1400,14 +1442,15 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
static_cast<const GCNTargetMachine &>(getTargetMachine());
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- Info.ptrVal = MFI->getGWSPSV(TM);
+ PtrVal = MFI->getGWSPSV(TM);
// This is an abstract access, but we need to specify a type and size.
Info.memVT = MVT::i32;
- Info.size = 4;
- Info.align = Align(4);
+ Size = 4;
+ BaseAlignment = Align(4);
- Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ Info.MMO = CreateMMO();
return true;
}
default:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
index 9514bea86e4d171..22dccf39cff0593 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
@@ -15,7 +15,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <
; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+ ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn
@@ -29,7 +29,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <
; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+ ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
; GFX90A_GFX940-NEXT: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
@@ -48,7 +48,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, <4
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+ ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn
@@ -63,7 +63,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, <4
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+ ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
; GFX90A_GFX940-NEXT: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -82,7 +82,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val, <4
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>)...
[truncated]
|
if (Info.MMO) | ||
MIB.addMemOperand(Info.MMO); | ||
else | ||
MIB.addMemOperand(MF->getMachineMemOperand( | ||
MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata())); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is an awkward API; either the IntrinsicInfo struct should contain all of the fields necessary to construct the MMO, or every instance of getTgtMemIntrinsic should be required to create the MMO.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it would be too much too change every instance of getTgtMemIntrinsic in one PR, but that could be the end goal.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't you just need to add ordering/syncscope fields?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, but @nhaehnle suggested to do this refactoring.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd prefer to just add the fields. It's worse to have 2 parallel APIs hidden in 1
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not just syncscope and ordering, but also failure ordering, AAInfo and Ranges. Basically, you end up replicating MachineMemOperand in a second place, which I find pretty questionable.
I agree that the desirable end result is that everybody just creates MachineMemOperand
s. Adding the field in IntrinsicInfo in this way was my proposal to provide a smoother upgrade path, but perhaps an alternative can be found.
For example, perhaps a second overload
virtual MachineMemOperand *getTgtMemIntrinsic(const CallInst &Call, MachineFunction &MF, unsigned IntrinsicID)
could be added whose default implementation forwards to the currently-existing overload.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think there's value in fully decoupling this from the Machine infrastructure though; in principle we should be able to perform IR optimizations with intrinsic knowledge prior to codegen
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fair enough. I don't care that much about this.
In #73613 it was suggested to split the PR in 2 parts. This is the first part, constructing MMO inside of getTgtMemIntrinsic.