diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp index 7bcc128cb114f..dbb488aedb81b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -11,6 +11,7 @@ #include "AMDGPUAliasAnalysis.h" #include "AMDGPU.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Instructions.h" @@ -112,5 +113,14 @@ ModRefInfo AMDGPUAAResult::getModRefInfoMask(const MemoryLocation &Loc, AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) return ModRefInfo::NoModRef; + // A `readonly noalias` function argument normally only gets a `Ref` mask. + // However,, if the calling convention of the function is one intended for + // program entry points, we know that such an argument will be invariant + // over the life of the program. + if (auto* Arg = dyn_cast(Base)) { + const Function *F = Arg->getParent(); + if (AMDGPU::isKernelCC(F) && Arg->hasNoAliasAttr() && Arg->onlyReadsMemory()) + return ModRefInfo::NoModRef; + } return ModRefInfo::ModRef; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-kernel-invariant-loads.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-kernel-invariant-loads.ll new file mode 100644 index 0000000000000..cfc64857040db --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-kernel-invariant-loads.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel -mtriple=amdgcn -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s + +define amdgpu_cs void @load_global_is_invariant(ptr addrspace(1) readonly noalias %x, ptr addrspace(1) writeonly noalias %y) { + ; CHECK-LABEL: name: load_global_is_invariant + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p1) :: (invariant load (s32) from %ir.x, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV1]](p1) :: (store (s32) into %ir.y, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %v = load float, ptr addrspace(1) %x + store float %v, ptr addrspace(1) %y + ret void +} + +define void @load_global_isnt_invariant_non_kernel(ptr addrspace(1) readonly noalias %x, ptr addrspace(1) writeonly noalias %y) { + ; CHECK-LABEL: name: load_global_isnt_invariant_non_kernel + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p1) :: (load (s32) from %ir.x, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV1]](p1) :: (store (s32) into %ir.y, addrspace 1) + ; CHECK-NEXT: SI_RETURN + %v = load float, ptr addrspace(1) %x + store float %v, ptr addrspace(1) %y + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/aa-points-to-constant-memory.ll b/llvm/test/CodeGen/AMDGPU/aa-points-to-constant-memory.ll index 4e945951dab62..6a3da62c6610c 100644 --- a/llvm/test/CodeGen/AMDGPU/aa-points-to-constant-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/aa-points-to-constant-memory.ll @@ -110,3 +110,19 @@ define amdgpu_kernel void @nonconst_gv_constant_as() { store i32 0, ptr addrspace(4) @global_nonconstant_constant_as ret void } + +define amdgpu_kernel void @constant_kernel_args(ptr addrspace(1) readonly noalias inreg %x) { +; CHECK-LABEL: @constant_kernel_args( +; CHECK-NEXT: ret void +; + store i32 0, ptr addrspace(1) %x + ret void +} + +define amdgpu_cs void @constant_cs_args(ptr addrspace(1) readonly noalias %x) { +; CHECK-LABEL: @constant_cs_args( +; CHECK-NEXT: ret void +; + store i32 0, ptr addrspace(1) %x + ret void +}