Skip to content

Commit

Permalink
[AMDGPU] Set noclobber metadata on loads instead of cast to constant
Browse files Browse the repository at this point in the history
A load via pointer cast to constant will return true from
pointsToConstantMemory which is not necessarily so.

Fixes: SWDEV-326463

Differential Revision: https://reviews.llvm.org/D121172
  • Loading branch information
rampitec committed Mar 8, 2022
1 parent ed28461 commit 9eabea3
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 106 deletions.
12 changes: 5 additions & 7 deletions clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
Expand Up @@ -18,7 +18,7 @@
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)*{{.*}} %x.coerce)
// CHECK: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
// OPT: [[VAL:%.*]] = load i32, i32 addrspace(4)* %x.coerce.const, align 4
// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4, !amdgpu.noclobber !2
// OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
// OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4
// OPT: ret void
Expand All @@ -30,7 +30,7 @@ __global__ void kernel1(int *x) {
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)*{{.*}} nonnull align 4 dereferenceable(4) %x.coerce)
// CHECK: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
// OPT: [[VAL:%.*]] = load i32, i32 addrspace(4)* %x.coerce.const, align 4
// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4, !amdgpu.noclobber !2
// OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
// OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4
// OPT: ret void
Expand Down Expand Up @@ -68,8 +68,7 @@ struct S {
// OPT: [[R1:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 1
// OPT: [[P1:%.*]] = load float*, float* addrspace(4)* [[R1]], align 8
// OPT: [[G1:%.*]] ={{.*}} addrspacecast float* [[P1]] to float addrspace(1)*
// OPT: [[G2:%.*]] ={{.*}} addrspacecast i32* [[P0]] to i32 addrspace(4)*
// OPT: [[V0:%.*]] = load i32, i32 addrspace(4)* [[G2]], align 4
// OPT: [[V0:%.*]] = load i32, i32 addrspace(1)* [[G0]], align 4, !amdgpu.noclobber !2
// OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1
// OPT: store i32 [[INC]], i32 addrspace(1)* [[G0]], align 4
// OPT: [[V1:%.*]] = load float, float addrspace(1)* [[G1]], align 4
Expand Down Expand Up @@ -104,8 +103,7 @@ struct T {
// OPT: [[R1:%.*]] = getelementptr inbounds %struct.T, %struct.T addrspace(4)* %0, i64 0, i32 0, i64 1
// OPT: [[P1:%.*]] = load float*, float* addrspace(4)* [[R1]], align 8
// OPT: [[G1:%.*]] ={{.*}} addrspacecast float* [[P1]] to float addrspace(1)*
// OPT: [[G2:%.*]] ={{.*}} addrspacecast float* [[P0]] to float addrspace(4)*
// OPT: [[V0:%.*]] = load float, float addrspace(4)* [[G2]], align 4
// OPT: [[V0:%.*]] = load float, float addrspace(1)* [[G0]], align 4, !amdgpu.noclobber !2
// OPT: [[ADD0:%.*]] = fadd contract float [[V0]], 1.000000e+00
// OPT: store float [[ADD0]], float addrspace(1)* [[G0]], align 4
// OPT: [[V1:%.*]] = load float, float addrspace(1)* [[G1]], align 4
Expand All @@ -132,7 +130,7 @@ struct SS {
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel82SS(float addrspace(1)*{{.*}} %a.coerce)
// CHECK: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
// OPT: [[VAL:%.*]] = load float, float addrspace(4)* %a.coerce.const, align 4
// OPT: [[VAL:%.*]] = load float, float addrspace(1)* %a.coerce, align 4, !amdgpu.noclobber !2
// OPT: [[INC:%.*]] = fadd contract float [[VAL]], 3.000000e+00
// OPT: store float [[INC]], float addrspace(1)* %a.coerce, align 4
// OPT: ret void
Expand Down
26 changes: 1 addition & 25 deletions llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
Expand Up @@ -131,31 +131,7 @@ bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
if (!LI->isSimple())
return false;

Value *Ptr = LI->getPointerOperand();

// Strip casts we have created earlier.
Value *OrigPtr = Ptr;
PointerType *PT;
for ( ; ; ) {
PT = cast<PointerType>(OrigPtr->getType());
if (PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
return false;
auto *P = dyn_cast<AddrSpaceCastInst>(OrigPtr);
if (!P)
break;
auto *NewPtr = P->getPointerOperand();
if (!cast<PointerType>(NewPtr->getType())->hasSameElementTypeAs(PT))
break;
OrigPtr = NewPtr;
}

IRBuilder<> B(LI);

PointerType *NewPT =
PointerType::getWithSamePointeeType(PT, AMDGPUAS::CONSTANT_ADDRESS);
Value *Cast = B.CreateAddrSpaceCast(OrigPtr, NewPT,
Twine(OrigPtr->getName(), ".const"));
LI->replaceUsesOfWith(Ptr, Cast);
LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {}));
return true;
}

Expand Down

0 comments on commit 9eabea3

Please sign in to comment.