Skip to content

Commit d406c2c

Browse files
committed
AMDGPU: Use ConstantPool as source value for DAG lowered kernarg loads
This isn't quite a constant pool, but probably close enough for this purpose. We just need some known invariant value address. The aliasing queries against the real kernarg base pointer will falsely report no aliasing, but for invariant memory it probably doesn't matter.
1 parent 4be9e5b commit d406c2c

File tree

10 files changed

+433
-391
lines changed

10 files changed

+433
-391
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/CodeGen/FunctionLoweringInfo.h"
2222
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
2323
#include "llvm/CodeGen/MachineFrameInfo.h"
24+
#include "llvm/CodeGen/PseudoSourceValueManager.h"
2425
#include "llvm/IR/IntrinsicsAMDGPU.h"
2526

2627
#define DEBUG_TYPE "amdgpu-call-lowering"
@@ -414,7 +415,10 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, ArgInfo &OrigArg,
414415
MachineFunction &MF = B.getMF();
415416
const Function &F = MF.getFunction();
416417
const DataLayout &DL = F.getDataLayout();
417-
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
418+
419+
// This isn't really a constant pool but close enough.
420+
MachinePointerInfo PtrInfo(MF.getPSVManager().getConstantPool(),
421+
AMDGPUAS::CONSTANT_ADDRESS);
418422

419423
LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
420424

llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,17 @@ Intrinsic::ID AMDGPU::getIntrinsicID(const MachineInstr &I) {
2828

2929
// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
3030
bool AMDGPU::isUniformMMO(const MachineMemOperand *MMO) {
31-
// FIXME: null value is should be treated as unknown, not as uniform.
3231
const Value *Ptr = MMO->getValue();
32+
if (!Ptr) {
33+
if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
34+
return PSV->isConstantPool() || PSV->isStack() || PSV->isGOT() ||
35+
PSV->isJumpTable();
36+
}
37+
38+
// FIXME: null value is should be treated as unknown, not as uniform.
39+
return true;
40+
}
41+
3342
// UndefValue means this is a load of a kernel input. These are uniform.
3443
// Sometimes LDS instructions have constant pointers.
3544
// If Ptr is null, then that means this mem operand contains a

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
3131
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
3232
#include "llvm/CodeGen/GlobalISel/Utils.h"
33+
#include "llvm/CodeGen/PseudoSourceValueManager.h"
3334
#include "llvm/CodeGen/TargetOpcodes.h"
3435
#include "llvm/IR/DiagnosticInfo.h"
3536
#include "llvm/IR/IntrinsicsAMDGPU.h"
@@ -2321,14 +2322,15 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
23212322
return B.buildUnmerge(S32, Dst).getReg(1);
23222323
}
23232324

2324-
// TODO: can we be smarter about machine pointer info?
2325-
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
23262325
Register LoadAddr = MRI.createGenericVirtualRegister(
23272326
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
23282327
// For code object version 5, private_base and shared_base are passed through
23292328
// implicit kernargs.
23302329
if (AMDGPU::getAMDHSACodeObjectVersion(*MF.getFunction().getParent()) >=
23312330
AMDGPU::AMDHSA_COV5) {
2331+
// TODO: can we be smarter about machine pointer info?
2332+
MachinePointerInfo PtrInfo = getKernargSegmentPtrInfo(B.getMF());
2333+
23322334
AMDGPUTargetLowering::ImplicitParameter Param =
23332335
AS == AMDGPUAS::LOCAL_ADDRESS ? AMDGPUTargetLowering::SHARED_BASE
23342336
: AMDGPUTargetLowering::PRIVATE_BASE;
@@ -2343,7 +2345,7 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
23432345
return Register();
23442346

23452347
MachineMemOperand *MMO = MF.getMachineMemOperand(
2346-
PtrInfo,
2348+
PtrInfo.getWithOffset(Offset),
23472349
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
23482350
MachineMemOperand::MOInvariant,
23492351
LLT::scalar(32), commonAlignment(Align(64), Offset));
@@ -2361,6 +2363,9 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
23612363
if (!loadInputValue(QueuePtr, B, AMDGPUFunctionArgInfo::QUEUE_PTR))
23622364
return Register();
23632365

2366+
// TODO: can we be smarter about machine pointer info?
2367+
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
2368+
23642369
// Offset into amd_queue_t for group_segment_aperture_base_hi /
23652370
// private_segment_aperture_base_hi.
23662371
uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
@@ -4709,6 +4714,14 @@ bool AMDGPULegalizerInfo::legalizeWorkitemIDIntrinsic(
47094714
return true;
47104715
}
47114716

4717+
MachinePointerInfo
4718+
AMDGPULegalizerInfo::getKernargSegmentPtrInfo(MachineFunction &MF) const {
4719+
// This isn't really a constant pool but close enough.
4720+
MachinePointerInfo PtrInfo(MF.getPSVManager().getConstantPool());
4721+
PtrInfo.AddrSpace = AMDGPUAS::CONSTANT_ADDRESS;
4722+
return PtrInfo;
4723+
}
4724+
47124725
Register AMDGPULegalizerInfo::getKernargParameterPtr(MachineIRBuilder &B,
47134726
int64_t Offset) const {
47144727
LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
@@ -4736,8 +4749,8 @@ bool AMDGPULegalizerInfo::legalizeKernargMemParameter(MachineInstr &MI,
47364749
"unexpected kernarg parameter type");
47374750

47384751
Register Ptr = getKernargParameterPtr(B, Offset);
4739-
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
4740-
B.buildLoad(DstReg, Ptr, PtrInfo, Align(4),
4752+
MachinePointerInfo PtrInfo = getKernargSegmentPtrInfo(B.getMF());
4753+
B.buildLoad(DstReg, Ptr, PtrInfo.getWithOffset(Offset), Align(4),
47414754
MachineMemOperand::MODereferenceable |
47424755
MachineMemOperand::MOInvariant);
47434756
MI.eraseFromParent();
@@ -7260,9 +7273,9 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr(
72607273
return false;
72617274

72627275
// TODO: can we be smarter about machine pointer info?
7263-
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
7276+
MachinePointerInfo PtrInfo = getKernargSegmentPtrInfo(MF);
72647277
MachineMemOperand *MMO = MF.getMachineMemOperand(
7265-
PtrInfo,
7278+
PtrInfo.getWithOffset(Offset),
72667279
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
72677280
MachineMemOperand::MOInvariant,
72687281
LLT::scalar(64), commonAlignment(Align(64), Offset));

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
132132
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
133133
unsigned Dim, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
134134

135+
MachinePointerInfo getKernargSegmentPtrInfo(MachineFunction &MF) const;
135136
Register getKernargParameterPtr(MachineIRBuilder &B, int64_t Offset) const;
136137
bool legalizeKernargMemParameter(MachineInstr &MI, MachineIRBuilder &B,
137138
uint64_t Offset,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "llvm/CodeGen/MachineFrameInfo.h"
3636
#include "llvm/CodeGen/MachineFunction.h"
3737
#include "llvm/CodeGen/MachineLoopInfo.h"
38+
#include "llvm/CodeGen/PseudoSourceValueManager.h"
3839
#include "llvm/CodeGen/SDPatternMatch.h"
3940
#include "llvm/IR/DiagnosticInfo.h"
4041
#include "llvm/IR/IRBuilder.h"
@@ -2265,6 +2266,14 @@ bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const {
22652266
return TargetLowering::isTypeDesirableForOp(Op, VT);
22662267
}
22672268

2269+
MachinePointerInfo
2270+
SITargetLowering::getKernargSegmentPtrInfo(MachineFunction &MF) const {
2271+
// This isn't really a constant pool but close enough.
2272+
MachinePointerInfo PtrInfo(MF.getPSVManager().getConstantPool());
2273+
PtrInfo.AddrSpace = AMDGPUAS::CONSTANT_ADDRESS;
2274+
return PtrInfo;
2275+
}
2276+
22682277
SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
22692278
const SDLoc &SL,
22702279
SDValue Chain,
@@ -2341,7 +2350,9 @@ SDValue SITargetLowering::lowerKernargMemParameter(
23412350
SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Chain,
23422351
uint64_t Offset, Align Alignment, bool Signed,
23432352
const ISD::InputArg *Arg) const {
2344-
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
2353+
2354+
MachinePointerInfo PtrInfo =
2355+
getKernargSegmentPtrInfo(DAG.getMachineFunction());
23452356

23462357
// Try to avoid using an extload by loading earlier than the argument address,
23472358
// and extracting the relevant bits. The load should hopefully be merged with
@@ -2356,7 +2367,8 @@ SDValue SITargetLowering::lowerKernargMemParameter(
23562367
// TODO: If we passed in the base kernel offset we could have a better
23572368
// alignment than 4, but we don't really need it.
23582369
SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, AlignDownOffset);
2359-
SDValue Load = DAG.getLoad(MVT::i32, SL, Chain, Ptr, PtrInfo, Align(4),
2370+
SDValue Load = DAG.getLoad(MVT::i32, SL, Chain, Ptr,
2371+
PtrInfo.getWithOffset(AlignDownOffset), Align(4),
23602372
MachineMemOperand::MODereferenceable |
23612373
MachineMemOperand::MOInvariant);
23622374

@@ -2371,9 +2383,9 @@ SDValue SITargetLowering::lowerKernargMemParameter(
23712383
}
23722384

23732385
SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, Offset);
2374-
SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Alignment,
2375-
MachineMemOperand::MODereferenceable |
2376-
MachineMemOperand::MOInvariant);
2386+
SDValue Load = DAG.getLoad(
2387+
MemVT, SL, Chain, Ptr, PtrInfo.getWithOffset(Offset), Alignment,
2388+
MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant);
23772389

23782390
SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load, Signed, Arg);
23792391
return DAG.getMergeValues({Val, Load.getValue(1)}, SL);
@@ -8109,10 +8121,11 @@ SITargetLowering::loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT,
81098121
MachineFunction &MF = DAG.getMachineFunction();
81108122
uint64_t Offset = getImplicitParameterOffset(MF, Param);
81118123
SDValue Ptr = lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), Offset);
8112-
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
8113-
return DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, PtrInfo, Alignment,
8114-
MachineMemOperand::MODereferenceable |
8115-
MachineMemOperand::MOInvariant);
8124+
MachinePointerInfo PtrInfo =
8125+
getKernargSegmentPtrInfo(DAG.getMachineFunction());
8126+
return DAG.getLoad(
8127+
VT, DL, DAG.getEntryNode(), Ptr, PtrInfo.getWithOffset(Offset), Alignment,
8128+
MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant);
81168129
}
81178130

81188131
SDValue SITargetLowering::lowerTrapHsaQueuePtr(SDValue Op,

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
4646
unsigned &NumIntermediates, MVT &RegisterVT) const override;
4747

4848
private:
49+
MachinePointerInfo getKernargSegmentPtrInfo(MachineFunction &MF) const;
50+
4951
SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
5052
SDValue Chain, uint64_t Offset) const;
5153
SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;

0 commit comments

Comments
 (0)