-
Notifications
You must be signed in to change notification settings - Fork 15.2k
AMDGPU: Render non-0 values for amdgpu-agpr-alloc #162300
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Render non-0 values for amdgpu-agpr-alloc #162300
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesAMDGPU: Stop inferring amdgpu-agpr-alloc on irrelevant targets This only matters for subtargets with configurable AGPR allocation. AMDGPU: Render non-0 values for amdgpu-agpr-alloc This now tries to compute a lower bound on the number of registers Patch is 196.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162300.diff 26 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index e5206836e4946..e76f0e661d683 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1273,16 +1273,17 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
return std::min(std::max(MaxVirtReg, MaxPhysReg), 256u);
}
-// TODO: Migrate to range merge of amdgpu-agpr-alloc.
-struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
- using Base = StateWrapper<BooleanState, AbstractAttribute>;
- AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+struct AAAMDGPUMinAGPRAlloc
+ : public StateWrapper<DecIntegerState<>, AbstractAttribute> {
+ using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;
+ AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
- static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP,
- Attributor &A) {
+ static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,
+ Attributor &A) {
if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
- return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A);
- llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position");
+ return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);
+ llvm_unreachable(
+ "AAAMDGPUMinAGPRAlloc is only valid for function position");
}
void initialize(Attributor &A) override {
@@ -1295,25 +1296,33 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
}
const std::string getAsStr(Attributor *A) const override {
- return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr";
+ std::string Str = "amdgpu-agpr-alloc=";
+ raw_string_ostream OS(Str);
+ OS << getAssumed();
+ return OS.str();
}
void trackStatistics() const override {}
ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Use AACallEdges, but then we need a way to inspect asm edges.
+ DecIntegerState<> Maximum;
- auto CheckForNoAGPRs = [&](Instruction &I) {
+ // Check for cases which require allocation of AGPRs. The only cases where
+ // AGPRs are required are if there are direct references to AGPRs, so inline
+ // assembly and special intrinsics.
+ auto CheckForMinAGPRAllocs = [&](Instruction &I) {
const auto &CB = cast<CallBase>(I);
const Value *CalleeOp = CB.getCalledOperand();
- const Function *Callee = dyn_cast<Function>(CalleeOp);
- if (!Callee) {
- if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
- return inlineAsmGetNumRequiredAGPRs(IA, CB) == 0;
- return false;
+
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
+ // Technically, the inline asm could be invoking a call to an unknown
+ // external function that requires AGPRs, but ignore that.
+ unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB);
+ Maximum.takeAssumedMaximum(NumRegs);
+ return true;
}
- switch (Callee->getIntrinsicID()) {
+ switch (CB.getIntrinsicID()) {
case Intrinsic::not_intrinsic:
break;
case Intrinsic::write_register:
@@ -1323,7 +1332,10 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata());
auto [Kind, RegIdx, NumRegs] =
AMDGPU::parseAsmPhysRegName(RegName->getString());
- return Kind != 'a';
+ if (Kind == 'a')
+ Maximum.takeAssumedMaximum(std::min(RegIdx + NumRegs, 256u));
+
+ return true;
}
default:
// Some intrinsics may use AGPRs, but if we have a choice, we are not
@@ -1332,32 +1344,50 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
}
// TODO: Handle callsite attributes
- const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
- *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
- return CalleeInfo && CalleeInfo->isValidState() &&
- CalleeInfo->getAssumed();
+ auto *CBEdges = A.getAAFor<AACallEdges>(
+ *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
+ if (!CBEdges || CBEdges->hasUnknownCallee()) {
+ Maximum.indicatePessimisticFixpoint();
+ return false;
+ }
+
+ for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {
+ const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>(
+ *this, IRPosition::function(*PossibleCallee), DepClassTy::REQUIRED);
+ if (!CalleeInfo || !CalleeInfo->isValidState()) {
+ Maximum.indicatePessimisticFixpoint();
+ return false;
+ }
+
+ Maximum.takeAssumedMaximum(CalleeInfo->getAssumed());
+ }
+
+ return true;
};
bool UsedAssumedInformation = false;
- if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this,
+ if (!A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this,
UsedAssumedInformation))
return indicatePessimisticFixpoint();
- return ChangeStatus::UNCHANGED;
+
+ return clampStateAndIndicateChange(getState(), Maximum);
}
ChangeStatus manifest(Attributor &A) override {
- if (!getAssumed())
- return ChangeStatus::UNCHANGED;
LLVMContext &Ctx = getAssociatedFunction()->getContext();
- return A.manifestAttrs(getIRPosition(),
- {Attribute::get(Ctx, "amdgpu-agpr-alloc", "0")});
+ SmallString<4> Buffer;
+ raw_svector_ostream OS(Buffer);
+ OS << getAssumed();
+
+ return A.manifestAttrs(
+ getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())});
}
- StringRef getName() const override { return "AAAMDGPUNoAGPR"; }
+ StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
- /// AAAMDGPUNoAGPRs
+ /// AAAMDGPUMinAGPRAllocs
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
@@ -1365,7 +1395,7 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
static const char ID;
};
-const char AAAMDGPUNoAGPR::ID = 0;
+const char AAAMDGPUMinAGPRAlloc::ID = 0;
/// An abstract attribute to propagate the function attribute
/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
@@ -1533,10 +1563,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
DenseSet<const char *> Allowed(
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
- &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
- &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
- &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
- &AAIndirectCallInfo::ID, &AAAMDGPUClusterDims::ID});
+ &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
+ &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
+ &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
+ &AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
+ &AAAMDGPUClusterDims::ID});
AttributorConfig AC(CGUpdater);
AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1567,7 +1598,6 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));
- A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(*F));
CallingConv::ID CC = F->getCallingConv();
if (!AMDGPU::isEntryFunctionCC(CC)) {
A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(*F));
@@ -1578,6 +1608,9 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
if (!F->isDeclaration() && ST.hasClusters())
A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
+ if (ST.hasGFX90AInsts())
+ A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRPosition::function(*F));
+
for (auto &I : instructions(F)) {
Value *Ptr = nullptr;
if (auto *LI = dyn_cast<LoadInst>(&I))
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index 2d7ef2c262157..98fbbe1a515ed 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -169,6 +169,6 @@ attributes #1 = { nounwind }
;.
; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
similarity index 67%
rename from llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
rename to llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
index 4b2ecd59aa9c2..3c5676e85bb8a 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
@@ -70,7 +70,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_def() {
define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: [[DEF:%.*]] = call i64 asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -118,7 +118,7 @@ define amdgpu_kernel void @kernel_uses_asm_physreg() {
define amdgpu_kernel void @kernel_uses_asm_physreg_tuple() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_tuple(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -154,7 +154,7 @@ define void @func_uses_asm_physreg_agpr() {
define void @func_uses_asm_physreg_agpr_tuple() {
; CHECK-LABEL: define void @func_uses_asm_physreg_agpr_tuple(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -168,7 +168,7 @@ declare void @unknown()
define amdgpu_kernel void @kernel_calls_extern() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
; CHECK-NEXT: call void @unknown()
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -180,8 +180,8 @@ define amdgpu_kernel void @kernel_calls_extern() {
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]]
+; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-NEXT: call void @unknown() #[[ATTR27:[0-9]+]]
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
@@ -192,7 +192,7 @@ define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
; CHECK-NEXT: call void [[INDIRECT]]()
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -204,8 +204,8 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]]
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR27]]
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
@@ -316,7 +316,7 @@ define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) {
define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
-; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
@@ -342,7 +342,7 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2]] {
; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -354,7 +354,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() {
define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR5:[0-9]+]] {
; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -378,7 +378,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2() {
define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -390,7 +390,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() {
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2]] {
; CHECK-NEXT: [[DEF:%.*]] = call ptr asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -402,7 +402,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() {
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR6:[0-9]+]] {
; CHECK-NEXT: [[DEF:%.*]] = call <2 x ptr> asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -414,7 +414,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() {
define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR7:[0-9]+]] {
; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -426,7 +426,7 @@ define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() {
define amdgpu_kernel void @kernel_uses_asm_clobber() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR8:[0-9]+]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -438,7 +438,7 @@ define amdgpu_kernel void @kernel_uses_asm_clobber() {
define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_tuple(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR9:[0-9]+]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -450,7 +450,7 @@ define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() {
define amdgpu_kernel void @kernel_uses_asm_clobber_oob() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_oob(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR10:[0-9]+]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -462,7 +462,7 @@ define amdgpu_kernel void @kernel_uses_asm_clobber_oob() {
define amdgpu_kernel void @kernel_uses_asm_clobber_max() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_max(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR10]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -474,7 +474,7 @@ define amdgpu_kernel void @kernel_uses_asm_clobber_max() {
define amdgpu_kernel void @kernel_uses_asm_physreg_oob() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_oob(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR10]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -486,7 +486,7 @@ define amdgpu_kernel void @kernel_uses_asm_physreg_oob() {
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR11:[0-9]+]] {
; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -498,7 +498,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() {
define amdgpu_kernel void @kernel_uses_asm_vir...
[truncated]
|
The stack is messed up. |
It's not messed up, it intentionally includes #161957 as a separate pre-commit since I don't know a better way to deal with merge points across separate stacks |
2886cd6
to
0581b52
Compare
3b6aa65
to
57b2c89
Compare
0581b52
to
1cc1194
Compare
57b2c89
to
424d6ac
Compare
1cc1194
to
f55d72b
Compare
424d6ac
to
37f3077
Compare
f55d72b
to
fc465f1
Compare
fc465f1
to
b2b9ed7
Compare
This now tries to compute a lower bound on the number of registers for individual inline asm uses. Also starts using AACallEdges to handling indirect calls.
b2b9ed7
to
98db128
Compare
(#162300) This now tries to compute a lower bound on the number of registers for individual inline asm uses. Also starts using AACallEdges to handling indirect calls.
(llvm#162300) This now tries to compute a lower bound on the number of registers for individual inline asm uses. Also starts using AACallEdges to handling indirect calls.
This now tries to compute a lower bound on the number of registers
for individual inline asm uses. Also starts using AACallEdges
to handling indirect calls.