Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-make-buffer-rsrc.cl
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_flags_constant(global voi
// CHECK-LABEL: @test_amdgcn_make_buffer_p0_nullptr(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr null, i16 [[STRIDE:%.*]], i64 [[CONV]], i32 [[FLAGS:%.*]])
// CHECK-NEXT: [[TMP0:%.*]] = tail call align 4294967296 ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr null, i16 [[STRIDE:%.*]], i64 [[CONV]], i32 [[FLAGS:%.*]])
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
//
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p0_nullptr(short stride, int num, int flags) {
Expand All @@ -93,7 +93,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p0_nullptr(short stride, int num,
// CHECK-LABEL: @test_amdgcn_make_buffer_p1_nullptr(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) null, i16 [[STRIDE:%.*]], i64 [[CONV]], i32 [[FLAGS:%.*]])
// CHECK-NEXT: [[TMP0:%.*]] = tail call align 4294967296 ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) null, i16 [[STRIDE:%.*]], i64 [[CONV]], i32 [[FLAGS:%.*]])
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
//
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p1_nullptr(short stride, int num, int flags) {
Expand Down
118 changes: 117 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1584,6 +1584,117 @@ AAAMDGPUClusterDims::createForPosition(const IRPosition &IRP, Attributor &A) {
llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");
}

struct AAAMDGPUMakeBufferRsrcAlign
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd make it something like AAAMDGPUAlign, and then use it to deal with all AMDGPU related alignments.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, document the new class.

: public IRAttribute<
Attribute::Alignment,
StateWrapper<IncIntegerState<uint64_t, Value::MaximumAlignment, 1>,
AbstractAttribute>,
AAAMDGPUMakeBufferRsrcAlign> {
using Base = IRAttribute<
Attribute::Alignment,
StateWrapper<IncIntegerState<uint64_t, Value::MaximumAlignment, 1>,
AbstractAttribute>,
AAAMDGPUMakeBufferRsrcAlign>;

AAAMDGPUMakeBufferRsrcAlign(const IRPosition &IRP, Attributor &A)
: Base(IRP) {}

void initialize(Attributor &A) override {}

ChangeStatus updateImpl(Attributor &A) override {
Instruction *I = getIRPosition().getCtxI();
const auto *AlignAA = A.getAAFor<AAAlign>(
*this, IRPosition::value(*(I->getOperand(0))), DepClassTy::REQUIRED);
if (AlignAA)
return clampStateAndIndicateChange<StateType>(
this->getState(), AlignAA->getAssumedAlign().value());

return indicatePessimisticFixpoint();
}

/// Create an abstract attribute view for the position \p IRP.
static AAAMDGPUMakeBufferRsrcAlign &createForPosition(const IRPosition &IRP,
Attributor &A) {
if (IRP.getPositionKind() == IRPosition::IRP_CALL_SITE_RETURNED)
if (Instruction *I = dyn_cast<Instruction>(&IRP.getAssociatedValue()))
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
return *new (A.Allocator) AAAMDGPUMakeBufferRsrcAlign(IRP, A);
llvm_unreachable("AAAMDGPUMakeBufferRsrcAlign is only valid for call site "
"return position on make.buffer.rsrc intrinsic");
}

// Manifest users
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;

// Check for users that allow alignment annotations.
Value &AssociatedValue = getAssociatedValue();
if (isa<ConstantData>(AssociatedValue))
return ChangeStatus::UNCHANGED;

for (const Use &U : AssociatedValue.uses()) {
if (auto *SI = dyn_cast<StoreInst>(U.getUser())) {
if (SI->getPointerOperand() == &AssociatedValue) {
if (SI->getAlign() > getAssumedAlign()) {
SI->setAlignment(getAssumedAlign());
Changed = ChangeStatus::CHANGED;
}
}
} else if (auto *LI = dyn_cast<LoadInst>(U.getUser())) {
if (LI->getPointerOperand() == &AssociatedValue) {
if (LI->getAlign() > getAssumedAlign()) {
LI->setAlignment(getAssumedAlign());
Changed = ChangeStatus::CHANGED;
}
}
} else if (auto *RMW = dyn_cast<AtomicRMWInst>(U.getUser())) {
if (RMW->getAlign() > getAssumedAlign()) {
RMW->setAlignment(getAssumedAlign());
Changed = ChangeStatus::CHANGED;
}
} else if (auto *CAS = dyn_cast<AtomicCmpXchgInst>(U.getUser())) {
if (CAS->getAlign() > getAssumedAlign()) {
CAS->setAlignment(getAssumedAlign());
Changed = ChangeStatus::CHANGED;
}
}
}

// Manifest intrinsic it self
Changed |= Base::manifest(A);

return Changed;
}

StringRef getName() const override { return "AAAMDGPUMakeBufferRsrcAlign"; }

const std::string getAsStr(Attributor *) const override {
std::string Buffer = "AAAMDGPUMakeBufferRsrcAlign[";
raw_string_ostream OS(Buffer);
OS << getState().getKnown() << ',' << getState().getAssumed() << ']';
return OS.str();
}

const char *getIdAddr() const override { return &ID; }

void trackStatistics() const override {}

Align getAssumedAlign() const { return Align(getAssumed()); }

void getDeducedAttributes(Attributor &A, LLVMContext &Ctx,
SmallVectorImpl<Attribute> &Attrs) const override {
if (getAssumedAlign() > 1)
Attrs.emplace_back(
Attribute::getWithAlignment(Ctx, Align(getAssumedAlign())));
}

/// Unique ID (due to the unique address)
static const char ID;
};

const char AAAMDGPUMakeBufferRsrcAlign::ID = 0;

static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
AMDGPUAttributorOptions Options,
ThinOrFullLTOPhase LTOPhase) {
Expand All @@ -1603,7 +1714,8 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
&AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
&AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
&AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
&AAAMDGPUClusterDims::ID});
&AAAMDGPUClusterDims::ID, &AAAlign::ID,
&AAAMDGPUMakeBufferRsrcAlign::ID});

AttributorConfig AC(CGUpdater);
AC.IsClosedWorldModule = Options.IsClosedWorld;
Expand Down Expand Up @@ -1657,6 +1769,10 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
Ptr = RMW->getPointerOperand();
else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
Ptr = CmpX->getPointerOperand();
else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
A.getOrCreateAAFor<AAAMDGPUMakeBufferRsrcAlign>(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The creation of the AA is weird here as well. I'd expect that we want to know the alignment of a load/store instruction, and we see the pointer is a buffer pointer, then we do something. Here it looks like it is completely up side down.

IRPosition::value(*II));

if (Ptr) {
A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
Expand Down
26 changes: 26 additions & 0 deletions llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s

define float @load_gt_base(ptr align 4 %p) {
; CHECK-LABEL: define float @load_gt_base(
; CHECK-SAME: ptr align 4 [[P:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr align 4 [[P]], i16 0, i64 0, i32 0)
; CHECK-NEXT: [[LOADED:%.*]] = load float, ptr addrspace(7) [[PTR]], align 4
; CHECK-NEXT: ret float [[LOADED]]
;
%ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr %p, i16 0, i64 0, i32 0)
%loaded = load float, ptr addrspace(7) %ptr, align 8
ret float %loaded
}

define float @load_lt_base(ptr align 8 %p) {
; CHECK-LABEL: define float @load_lt_base(
; CHECK-SAME: ptr align 8 [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr align 8 [[P]], i16 0, i64 0, i32 0)
; CHECK-NEXT: [[LOADED:%.*]] = load float, ptr addrspace(7) [[PTR]], align 4
; CHECK-NEXT: ret float [[LOADED]]
;
%ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr %p, i16 0, i64 0, i32 0)
%loaded = load float, ptr addrspace(7) %ptr, align 4
ret float %loaded
}
Loading