diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll index a86939fc2ce8e..f2035c2787131 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll @@ -1119,7 +1119,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #1 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm - %gep = getelementptr i32, ptr %ptr, i32 4 + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4 store i32 %result, ptr %out, align 4 ret void @@ -1218,7 +1218,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_system(ptr %out, ptr % ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm - %gep = getelementptr i32, ptr %ptr, i32 4 + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %result = atomicrmw udec_wrap ptr %gep, i32 42 seq_cst, align 4 store i32 %result, ptr %out, align 4 ret void @@ -1384,7 +1384,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 { ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm - %gep = getelementptr i32, ptr %ptr, i32 4 + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4 ret void } @@ -1470,7 +1470,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_system(ptr %ptr) #1 ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm - %gep = getelementptr i32, ptr %ptr, i32 4 + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %result = atomicrmw udec_wrap ptr %gep, i32 42 seq_cst, align 4 ret void } @@ -1599,7 +1599,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr % %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i32, ptr %ptr, i32 %id %out.gep = getelementptr i32, ptr %out, i32 %id - %gep = getelementptr i32, ptr %gep.tid, i32 5 + %gep = getelementptr inbounds i32, ptr %gep.tid, i32 5 %result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4 store i32 %result, ptr %out.gep, align 4 ret void @@ -1706,7 +1706,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #1 ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i32, ptr %ptr, i32 %id - %gep = getelementptr i32, ptr %gep.tid, i32 5 + %gep = getelementptr inbounds i32, ptr %gep.tid, i32 5 %result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4 ret void } @@ -1926,7 +1926,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #1 ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX11-NEXT: s_endpgm - %gep = getelementptr i64, ptr %ptr, i32 4 + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 %result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0 store i64 %result, ptr %out, align 4 ret void @@ -2102,7 +2102,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) #1 { ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm - %gep = getelementptr i64, ptr %ptr, i32 4 + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 %result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0 ret void } @@ -2193,7 +2193,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_system(ptr %ptr) #1 ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm - %gep = getelementptr i64, ptr %ptr, i32 4 + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 %result = atomicrmw udec_wrap ptr %gep, i64 42 seq_cst, align 8, !noalias.addrspace !0 ret void } @@ -2333,7 +2333,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr % %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i64, ptr %ptr, i32 %id %out.gep = getelementptr i64, ptr %out, i32 %id - %gep = getelementptr i64, ptr %gep.tid, i32 5 + %gep = getelementptr inbounds i64, ptr %gep.tid, i32 5 %result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0 store i64 %result, ptr %out.gep, align 4 ret void @@ -2444,7 +2444,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #1 ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i64, ptr %ptr, i32 %id - %gep = getelementptr i64, ptr %gep.tid, i32 5 + %gep = getelementptr inbounds i64, ptr %gep.tid, i32 5 %result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll index 7958e40ea0e68..80c743cac4840 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll @@ -2525,7 +2525,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) #1 ; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX12-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-NEXT: s_endpgm - %gep = getelementptr i32, ptr %ptr, i32 4 + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4 store i32 %result, ptr %out, align 4 ret void @@ -2639,7 +2639,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_system(ptr %out, ptr % ; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX12-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-NEXT: s_endpgm - %gep = getelementptr i32, ptr %ptr, i32 4 + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %result = atomicrmw uinc_wrap ptr %gep, i32 42 seq_cst, align 4 store i32 %result, ptr %out, align 4 ret void @@ -2827,7 +2827,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) #1 { ; GFX12-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm - %gep = getelementptr i32, ptr %ptr, i32 4 + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4 ret void } @@ -2926,7 +2926,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_system(ptr %ptr) #1 ; GFX12-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_endpgm - %gep = getelementptr i32, ptr %ptr, i32 4 + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %result = atomicrmw uinc_wrap ptr %gep, i32 42 seq_cst, align 4 ret void } @@ -3077,7 +3077,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr % %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i32, ptr %ptr, i32 %id %out.gep = getelementptr i32, ptr %out, i32 %id - %gep = getelementptr i32, ptr %gep.tid, i32 5 + %gep = getelementptr inbounds i32, ptr %gep.tid, i32 5 %result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4 store i32 %result, ptr %out.gep, align 4 ret void @@ -3201,7 +3201,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #1 ; GFX12-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i32, ptr %ptr, i32 %id - %gep = getelementptr i32, ptr %gep.tid, i32 5 + %gep = getelementptr inbounds i32, ptr %gep.tid, i32 5 %result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4 ret void } @@ -3571,7 +3571,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) #1 ; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm - %gep = getelementptr i64, ptr %ptr, i32 4 + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 %result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0 store i64 %result, ptr %out, align 4 ret void @@ -3701,7 +3701,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_system(ptr %out, ptr % ; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm - %gep = getelementptr i64, ptr %ptr, i32 4 + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 %result = atomicrmw uinc_wrap ptr %gep, i64 42 seq_cst, align 8, !noalias.addrspace !0 store i64 %result, ptr %out, align 4 ret void @@ -3901,7 +3901,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(ptr %ptr) #1 { ; GFX12-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm - %gep = getelementptr i64, ptr %ptr, i32 4 + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 %result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0 ret void } @@ -4006,7 +4006,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_system(ptr %ptr) #1 ; GFX12-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_endpgm - %gep = getelementptr i64, ptr %ptr, i32 4 + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 %result = atomicrmw uinc_wrap ptr %gep, i64 42 seq_cst, align 8, !noalias.addrspace !0 ret void } @@ -4169,7 +4169,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr % %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i64, ptr %ptr, i32 %id %out.gep = getelementptr i64, ptr %out, i32 %id - %gep = getelementptr i64, ptr %gep.tid, i32 5 + %gep = getelementptr inbounds i64, ptr %gep.tid, i32 5 %result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0 store i64 %result, ptr %out.gep, align 4 ret void @@ -4297,7 +4297,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #1 ; GFX12-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i64, ptr %ptr, i32 %id - %gep = getelementptr i64, ptr %gep.tid, i32 5 + %gep = getelementptr inbounds i64, ptr %gep.tid, i32 5 %result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll index 6792612ded368..7766b3ad45962 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll @@ -108,7 +108,7 @@ define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half> ; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-NEXT: buffer_inv sc1 ; GFX942-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i32 256 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i32 256 %result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst ret <2 x half> %result } @@ -122,7 +122,7 @@ define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val ; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-NEXT: buffer_inv sc1 ; GFX942-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i32 256 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i32 256 %unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir index 85d852fc779b2..be9de72a4ea9f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir @@ -153,7 +153,7 @@ body: | %2:vgpr(s32) = COPY $vgpr3 %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 %4:vgpr(s64) = G_CONSTANT i64 4 - %5:vgpr(p0) = G_PTR_ADD %0, %4 + %5:vgpr(p0) = inbounds G_PTR_ADD %0, %4 %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 0) $vgpr0 = COPY %6 @@ -305,7 +305,7 @@ body: | %2:vgpr(s64) = COPY $vgpr4_vgpr5 %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 %4:vgpr(s64) = G_CONSTANT i64 4 - %5:vgpr(p0) = G_PTR_ADD %0, %4 + %5:vgpr(p0) = inbounds G_PTR_ADD %0, %4 %6:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s64), addrspace 0) $vgpr0_vgpr1 = COPY %6 @@ -406,7 +406,7 @@ body: | %2:vgpr(s32) = COPY $vgpr3 %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 %4:vgpr(s64) = G_CONSTANT i64 -4 - %5:vgpr(p0) = G_PTR_ADD %0, %4 + %5:vgpr(p0) = inbounds G_PTR_ADD %0, %4 %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 0) $vgpr0 = COPY %6 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir index dc317a8413cd5..3389ed72fe7d9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir @@ -101,7 +101,7 @@ body: | %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 - %3:vgpr(p0) = G_PTR_ADD %0, %2 + %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2 %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) $vgpr0 = COPY %4 @@ -155,7 +155,7 @@ body: | %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 - %3:vgpr(p0) = G_PTR_ADD %0, %2 + %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2 %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) ... @@ -211,7 +211,7 @@ body: | %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 - %3:vgpr(p0) = G_PTR_ADD %0, %2 + %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2 %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) $vgpr0 = COPY %4 @@ -265,7 +265,7 @@ body: | %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 - %3:vgpr(p0) = G_PTR_ADD %0, %2 + %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2 %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) ... @@ -321,7 +321,7 @@ body: | %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 - %3:vgpr(p0) = G_PTR_ADD %0, %2 + %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2 %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) $vgpr0 = COPY %4 @@ -375,7 +375,7 @@ body: | %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 - %3:vgpr(p0) = G_PTR_ADD %0, %2 + %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2 %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) ... @@ -463,7 +463,7 @@ body: | %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 - %3:vgpr(p0) = G_PTR_ADD %0, %2 + %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2 %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) $vgpr0 = COPY %4 @@ -547,7 +547,7 @@ body: | %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 - %3:vgpr(p0) = G_PTR_ADD %0, %2 + %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2 %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) ... @@ -647,7 +647,7 @@ body: | %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 - %3:vgpr(p0) = G_PTR_ADD %0, %2 + %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2 %4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s64), addrspace 0) $vgpr0_vgpr1 = COPY %4 @@ -701,7 +701,7 @@ body: | %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 - %3:vgpr(p0) = G_PTR_ADD %0, %2 + %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2 %4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s64), addrspace 0) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir index eba64b853ac05..5bfb2b2e4d578 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir @@ -492,7 +492,7 @@ body: | ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 - %2:vgpr(p0) = G_PTR_ADD %0, %1 + %2:vgpr(p0) = inbounds G_PTR_ADD %0, %1 %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 0) $vgpr0 = COPY %3 @@ -561,7 +561,7 @@ body: | ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 - %2:vgpr(p0) = G_PTR_ADD %0, %1 + %2:vgpr(p0) = inbounds G_PTR_ADD %0, %1 %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 0) $vgpr0 = COPY %3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir index e1325a0a0bc50..532b4bfee3320 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -1191,7 +1191,7 @@ body: | ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2047 - %2:vgpr(p1) = G_PTR_ADD %0, %1 + %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1 %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 @@ -1275,7 +1275,7 @@ body: | ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2048 - %2:vgpr(p1) = G_PTR_ADD %0, %1 + %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1 %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 @@ -1375,7 +1375,7 @@ body: | ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2047 - %2:vgpr(p1) = G_PTR_ADD %0, %1 + %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1 %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 @@ -1475,7 +1475,7 @@ body: | ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 - %2:vgpr(p1) = G_PTR_ADD %0, %1 + %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1 %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 @@ -1559,7 +1559,7 @@ body: | ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 - %2:vgpr(p1) = G_PTR_ADD %0, %1 + %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1 %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 @@ -1659,7 +1659,7 @@ body: | ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4096 - %2:vgpr(p1) = G_PTR_ADD %0, %1 + %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1 %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 @@ -1759,7 +1759,7 @@ body: | ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4095 - %2:vgpr(p1) = G_PTR_ADD %0, %1 + %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1 %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 @@ -1859,7 +1859,7 @@ body: | ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4096 - %2:vgpr(p1) = G_PTR_ADD %0, %1 + %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1 %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 @@ -1959,7 +1959,7 @@ body: | ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8191 - %2:vgpr(p1) = G_PTR_ADD %0, %1 + %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1 %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 @@ -2059,7 +2059,7 @@ body: | ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8192 - %2:vgpr(p1) = G_PTR_ADD %0, %1 + %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1 %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 @@ -2159,7 +2159,7 @@ body: | ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8191 - %2:vgpr(p1) = G_PTR_ADD %0, %1 + %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1 %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 @@ -2259,7 +2259,7 @@ body: | ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8192 - %2:vgpr(p1) = G_PTR_ADD %0, %1 + %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1 %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 @@ -2359,7 +2359,7 @@ body: | ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8388607 - %2:vgpr(p1) = G_PTR_ADD %0, %1 + %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1 %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 @@ -2567,7 +2567,7 @@ body: | ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8388608 - %2:vgpr(p1) = G_PTR_ADD %0, %1 + %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1 %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir index 6e92d851dee2e..5b65c0e3734e7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -1237,7 +1237,7 @@ body: | %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 - %3:vgpr(p1) = G_PTR_ADD %0, %2 + %3:vgpr(p1) = inbounds G_PTR_ADD %0, %2 G_STORE %1, %3 :: (store (s32), align 4, addrspace 0) ... @@ -1337,7 +1337,7 @@ body: | %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 8388607 - %3:vgpr(p1) = G_PTR_ADD %0, %2 + %3:vgpr(p1) = inbounds G_PTR_ADD %0, %2 G_STORE %1, %3 :: (store (s32), align 4, addrspace 0) ... @@ -1545,7 +1545,7 @@ body: | %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 -8388608 - %3:vgpr(p1) = G_PTR_ADD %0, %2 + %3:vgpr(p1) = inbounds G_PTR_ADD %0, %2 G_STORE %1, %3 :: (store (s32), align 4, addrspace 0) ... diff --git a/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll b/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll index 887f489d504f2..3266fde10f9fb 100644 --- a/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll +++ b/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll @@ -25,7 +25,7 @@ define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32(ptr %addr, i32 %in) { ; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v0, v[0:1], v2 offset:-16 th:TH_ATOMIC_RETURN ; GFX12-GISEL-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %addr, i32 -4 + %gep = getelementptr inbounds i32, ptr %addr, i32 -4 %unused = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr %gep, i32 %in) ret void } @@ -49,7 +49,7 @@ define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32_forced(ptr %addr, i32 ; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v[0:1], v2 offset:-16 ; GFX12-GISEL-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %addr, i32 -4 + %gep = getelementptr inbounds i32, ptr %addr, i32 -4 %unused = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr %gep, i32 %in) ret void } @@ -83,7 +83,7 @@ define amdgpu_kernel void @flat_atomic_cond_sub_rtn_u32(ptr %addr, i32 %in, ptr ; GFX12-GISEL-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-GISEL-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %addr, i32 4 + %gep = getelementptr inbounds i32, ptr %addr, i32 4 %val = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr %gep, i32 %in) store i32 %val, ptr %use ret void diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.flat.prefetch.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.flat.prefetch.ll index 89555d3060883..d5fba2df0b828 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.flat.prefetch.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.flat.prefetch.ll @@ -31,7 +31,7 @@ define amdgpu_ps void @flat_prefetch_offset(ptr %ptr) { ; GCN-NEXT: flat_prefetch_b8 v[0:1] offset:512 ; GCN-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %ptr, i32 128 + %gep = getelementptr inbounds i32, ptr %ptr, i32 128 tail call void @llvm.amdgcn.flat.prefetch(ptr %gep, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.prefetch.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.prefetch.ll index 047a6ccf10d91..80f9eeb25ebc0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.prefetch.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.prefetch.ll @@ -31,7 +31,7 @@ define amdgpu_ps void @global_prefetch_offset(ptr addrspace(1) %ptr) { ; GCN-NEXT: global_prefetch_b8 v[0:1], off offset:512 ; GCN-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 128 + %gep = getelementptr inbounds i32, ptr addrspace(1) %ptr, i32 128 tail call void @llvm.amdgcn.global.prefetch(ptr addrspace(1) %gep, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.monitor.gfx1250.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.monitor.gfx1250.ll index 017d402f22b90..3377290ecb1e0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.monitor.gfx1250.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.monitor.gfx1250.ll @@ -104,7 +104,7 @@ define amdgpu_ps void @flat_load_monitor_b32(ptr %addr, ptr addrspace(1) %use) { ; GFX1250-NEXT: global_store_b32 v[2:3], v0, off ; GFX1250-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr addrspace(0) %addr, i32 4 + %gep = getelementptr inbounds i64, ptr addrspace(0) %addr, i32 4 %val = call i32 @llvm.amdgcn.flat.load.monitor.b32.i32(ptr addrspace(0) %gep, i32 10) store i32 %val, ptr addrspace(1) %use ret void @@ -118,7 +118,7 @@ define amdgpu_ps void @flat_load_monitor_b64(ptr %addr, ptr addrspace(1) %use) { ; GFX1250-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX1250-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr addrspace(0) %addr, i32 4 + %gep = getelementptr inbounds i64, ptr addrspace(0) %addr, i32 4 %val = call <2 x i32> @llvm.amdgcn.flat.load.monitor.b64.v2i32(ptr addrspace(0) %gep, i32 22) store <2 x i32> %val, ptr addrspace(1) %use ret void @@ -132,7 +132,7 @@ define amdgpu_ps void @flat_load_monitor_b128(ptr %addr, ptr addrspace(1) %use) ; GFX1250-NEXT: global_store_b128 v[2:3], v[4:7], off ; GFX1250-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr addrspace(0) %addr, i32 4 + %gep = getelementptr inbounds i64, ptr addrspace(0) %addr, i32 4 %val = call <4 x i32> @llvm.amdgcn.flat.load.monitor.b128.v4i32(ptr addrspace(0) %gep, i32 27) store <4 x i32> %val, ptr addrspace(1) %use ret void diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll index d95fc77939e83..a3c38b17abf00 100644 --- a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll @@ -85,7 +85,7 @@ define i8 @flat_inst_valu_offset_1(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:1 ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 1 + %gep = getelementptr inbounds i8, ptr %p, i64 1 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -160,7 +160,7 @@ define i8 @flat_inst_valu_offset_11bit_max(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:2047 ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 2047 + %gep = getelementptr inbounds i8, ptr %p, i64 2047 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -235,7 +235,7 @@ define i8 @flat_inst_valu_offset_12bit_max(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:4095 ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 4095 + %gep = getelementptr inbounds i8, ptr %p, i64 4095 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -330,7 +330,7 @@ define i8 @flat_inst_valu_offset_13bit_max(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:8191 ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 8191 + %gep = getelementptr inbounds i8, ptr %p, i64 8191 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -425,7 +425,7 @@ define i8 @flat_inst_valu_offset_24bit_max(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:8388607 ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 8388607 + %gep = getelementptr inbounds i8, ptr %p, i64 8388607 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -511,7 +511,7 @@ define i8 @flat_inst_valu_offset_neg_11bit_max(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:-2048 ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -2048 + %gep = getelementptr inbounds i8, ptr %p, i64 -2048 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -597,7 +597,7 @@ define i8 @flat_inst_valu_offset_neg_12bit_max(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:-4096 ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -4096 + %gep = getelementptr inbounds i8, ptr %p, i64 -4096 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -683,7 +683,7 @@ define i8 @flat_inst_valu_offset_neg_13bit_max(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:-8192 ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -8192 + %gep = getelementptr inbounds i8, ptr %p, i64 -8192 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -769,7 +769,7 @@ define i8 @flat_inst_valu_offset_neg_24bit_max(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:-8388608 ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -8388608 + %gep = getelementptr inbounds i8, ptr %p, i64 -8388608 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -845,7 +845,7 @@ define i8 @flat_inst_valu_offset_2x_11bit_max(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:4095 ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 4095 + %gep = getelementptr inbounds i8, ptr %p, i64 4095 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -940,7 +940,7 @@ define i8 @flat_inst_valu_offset_2x_12bit_max(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:8191 ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 8191 + %gep = getelementptr inbounds i8, ptr %p, i64 8191 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -1035,7 +1035,7 @@ define i8 @flat_inst_valu_offset_2x_13bit_max(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:16383 ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 16383 + %gep = getelementptr inbounds i8, ptr %p, i64 16383 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -1139,7 +1139,7 @@ define i8 @flat_inst_valu_offset_2x_24bit_max(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 16777214 + %gep = getelementptr inbounds i8, ptr %p, i64 16777214 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -1225,7 +1225,7 @@ define i8 @flat_inst_valu_offset_2x_neg_11bit_max(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:-4096 ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -4096 + %gep = getelementptr inbounds i8, ptr %p, i64 -4096 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -1311,7 +1311,7 @@ define i8 @flat_inst_valu_offset_2x_neg_12bit_max(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:-8192 ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -8192 + %gep = getelementptr inbounds i8, ptr %p, i64 -8192 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -1397,7 +1397,7 @@ define i8 @flat_inst_valu_offset_2x_neg_13bit_max(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:-16384 ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -16384 + %gep = getelementptr inbounds i8, ptr %p, i64 -16384 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -2835,7 +2835,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_1(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 1 + %gep = getelementptr inbounds i8, ptr %p, i64 1 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -2925,7 +2925,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_11bit_max(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 2047 + %gep = getelementptr inbounds i8, ptr %p, i64 2047 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -3015,7 +3015,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_12bit_max(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 4095 + %gep = getelementptr inbounds i8, ptr %p, i64 4095 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -3127,7 +3127,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_13bit_max(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 8191 + %gep = getelementptr inbounds i8, ptr %p, i64 8191 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -3239,7 +3239,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_neg_11bit_max(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 -2048 + %gep = getelementptr inbounds i8, ptr %p, i64 -2048 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -3351,7 +3351,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_neg_12bit_max(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 -4096 + %gep = getelementptr inbounds i8, ptr %p, i64 -4096 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -3463,7 +3463,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_neg_13bit_max(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 -8192 + %gep = getelementptr inbounds i8, ptr %p, i64 -8192 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -3553,7 +3553,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_2x_11bit_max(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 4095 + %gep = getelementptr inbounds i8, ptr %p, i64 4095 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -3665,7 +3665,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_2x_12bit_max(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 8191 + %gep = getelementptr inbounds i8, ptr %p, i64 8191 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -3777,7 +3777,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_2x_13bit_max(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 16383 + %gep = getelementptr inbounds i8, ptr %p, i64 16383 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -3889,7 +3889,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_11bit_max(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 -4096 + %gep = getelementptr inbounds i8, ptr %p, i64 -4096 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -4001,7 +4001,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_12bit_max(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 -8192 + %gep = getelementptr inbounds i8, ptr %p, i64 -8192 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -4113,7 +4113,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_13bit_max(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 -16384 + %gep = getelementptr inbounds i8, ptr %p, i64 -16384 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void