diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index a190f0dac1379..d290f202f3cca 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -5596,6 +5596,19 @@ static bool FindAllMemoryUses( continue; } + if (IntrinsicInst *II = dyn_cast(UserI)) { + SmallVector PtrOps; + Type *AccessTy; + if (!TLI.getAddrModeArguments(II, PtrOps, AccessTy)) + return true; + + if (!find(PtrOps, U.get())) + return true; + + MemoryUses.push_back({&U, AccessTy}); + continue; + } + if (CallInst *CI = dyn_cast(UserI)) { if (CI->hasFnAttr(Attribute::Cold)) { // If this is a cold call, we can sink the addressing calculation into diff --git a/llvm/test/CodeGen/AMDGPU/sink-addr-memory-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/sink-addr-memory-intrinsics.ll index df30e3755490f..970c98afd2a05 100644 --- a/llvm/test/CodeGen/AMDGPU/sink-addr-memory-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/sink-addr-memory-intrinsics.ll @@ -6,25 +6,23 @@ define amdgpu_kernel void @memoryIntrinstic(ptr addrspace(3) %inptr, i1 %cond, p ; CHECK: ; %bb.0: ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_and_b32 s1, s1, 1 -; CHECK-NEXT: s_add_i32 s3, s0, 0x2000 -; CHECK-NEXT: s_cmp_eq_u32 s1, 0 +; CHECK-NEXT: s_bitcmp0_b32 s1, 0 ; CHECK-NEXT: s_cbranch_scc0 .LBB0_2 ; CHECK-NEXT: ; %bb.1: ; %else -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: ds_read_b64_tr_b16 v[2:3], v0 -; CHECK-NEXT: s_mov_b32 s0, 0x7060302 -; CHECK-NEXT: s_mov_b32 s1, 0x5040100 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: ds_read_b64_tr_b16 v[2:3], v0 offset:8192 +; CHECK-NEXT: s_mov_b32 s1, 0x7060302 +; CHECK-NEXT: s_mov_b32 s3, 0x5040100 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_perm_b32 v0, v3, v2, s0 -; CHECK-NEXT: v_perm_b32 v1, v3, v2, s1 +; CHECK-NEXT: v_perm_b32 v0, v3, v2, s1 +; CHECK-NEXT: v_perm_b32 v1, v3, v2, s3 ; CHECK-NEXT: s_cbranch_execz .LBB0_3 ; CHECK-NEXT: s_branch .LBB0_4 ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: ; implicit-def: $vgpr1 ; CHECK-NEXT: .LBB0_3: ; %then -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: ds_read_b64_tr_b16 v[2:3], v0 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: ds_read_b64_tr_b16 v[2:3], v0 offset:8192 ; CHECK-NEXT: s_mov_b32 s0, 0x5040100 ; CHECK-NEXT: s_mov_b32 s1, 0x7060302 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)