diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.vni16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.vni16.ll index 4a66116bb2b59..e7350fec10546 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.vni16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.vni16.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s -define void @add_v3i16(<3 x i16> addrspace(1)* %ptra, <3 x i16> addrspace(1)* %ptrb, <3 x i16> addrspace(1)* %ptr2) { +define void @add_v3i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) { ; GFX8-LABEL: add_v3i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -58,10 +58,10 @@ define void @add_v3i16(<3 x i16> addrspace(1)* %ptra, <3 x i16> addrspace(1)* %p ; GFX9-NEXT: global_store_short v[4:5], v2, off offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] - %a = load <3 x i16>, <3 x i16> addrspace(1)* %ptra, align 4 - %b = load <3 x i16>, <3 x i16> addrspace(1)* %ptrb, align 4 + %a = load <3 x i16>, ptr addrspace(1) %ptra, align 4 + %b = load <3 x i16>, ptr addrspace(1) %ptrb, align 4 %add = add <3 x i16> %a, %b - store <3 x i16> %add, <3 x i16> addrspace(1)* %ptr2, align 4 + store <3 x i16> %add, ptr addrspace(1) %ptr2, align 4 ret void } @@ -85,7 +85,7 @@ define <3 x i16> @add_v3i16_arg(<3 x i16> %a, <3 x i16> %b) { ret <3 x i16> %add } -define void @add_v4i16(<4 x i16> addrspace(1)* %ptra, <4 x i16> addrspace(1)* %ptrb, <4 x i16> addrspace(1)* %ptr2) { +define void @add_v4i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) { ; GFX8-LABEL: add_v4i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -113,10 +113,10 @@ define void @add_v4i16(<4 x i16> addrspace(1)* %ptra, <4 x i16> addrspace(1)* %p ; GFX9-NEXT: global_store_dwordx2 v[4:5], v[0:1], off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] - %a = load <4 x i16>, <4 x i16> addrspace(1)* %ptra, align 4 - %b = load <4 x i16>, <4 x i16> addrspace(1)* %ptrb, align 4 + %a = load <4 x i16>, ptr addrspace(1) %ptra, align 4 + %b = load <4 x i16>, ptr addrspace(1) %ptrb, align 4 %add = add <4 x i16> %a, %b - store <4 x i16> %add, <4 x i16> addrspace(1)* %ptr2, align 4 + store <4 x i16> %add, ptr addrspace(1) %ptr2, align 4 ret void } @@ -142,7 +142,7 @@ define <4 x i16> @add_v4i16_arg(<4 x i16> %a, <4 x i16> %b) { ret <4 x i16> %add } -define void @add_v5i16(<5 x i16> addrspace(1)* %ptra, <5 x i16> addrspace(1)* %ptrb, <5 x i16> addrspace(1)* %ptr2) { +define void @add_v5i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) { ; GFX8-LABEL: add_v5i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -231,10 +231,10 @@ define void @add_v5i16(<5 x i16> addrspace(1)* %ptra, <5 x i16> addrspace(1)* %p ; GFX9-NEXT: global_store_short v[4:5], v6, off offset:8 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] - %a = load <5 x i16>, <5 x i16> addrspace(1)* %ptra, align 4 - %b = load <5 x i16>, <5 x i16> addrspace(1)* %ptrb, align 4 + %a = load <5 x i16>, ptr addrspace(1) %ptra, align 4 + %b = load <5 x i16>, ptr addrspace(1) %ptrb, align 4 %add = add <5 x i16> %a, %b - store <5 x i16> %add, <5 x i16> addrspace(1)* %ptr2, align 4 + store <5 x i16> %add, ptr addrspace(1) %ptr2, align 4 ret void } @@ -262,7 +262,7 @@ define <5 x i16> @add_v5i16_arg(<5 x i16> %a, <5 x i16> %b) { ret <5 x i16> %add } -define void @add_v6i16(<6 x i16> addrspace(1)* %ptra, <6 x i16> addrspace(1)* %ptrb, <6 x i16> addrspace(1)* %ptr2) { +define void @add_v6i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) { ; GFX8-LABEL: add_v6i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -294,10 +294,10 @@ define void @add_v6i16(<6 x i16> addrspace(1)* %ptra, <6 x i16> addrspace(1)* %p ; GFX9-NEXT: global_store_dwordx3 v[4:5], v[0:2], off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] - %a = load <6 x i16>, <6 x i16> addrspace(1)* %ptra, align 4 - %b = load <6 x i16>, <6 x i16> addrspace(1)* %ptrb, align 4 + %a = load <6 x i16>, ptr addrspace(1) %ptra, align 4 + %b = load <6 x i16>, ptr addrspace(1) %ptrb, align 4 %add = add <6 x i16> %a, %b - store <6 x i16> %add, <6 x i16> addrspace(1)* %ptr2, align 4 + store <6 x i16> %add, ptr addrspace(1) %ptr2, align 4 ret void } @@ -327,7 +327,7 @@ define <6 x i16> @add_v6i16_arg(<6 x i16> %a, <6 x i16> %b) { ret <6 x i16> %add } -define void @addv_7i16(<7 x i16> addrspace(1)* %ptra, <7 x i16> addrspace(1)* %ptrb, <7 x i16> addrspace(1)* %ptr2) { +define void @addv_7i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) { ; GFX8-LABEL: addv_7i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -449,10 +449,10 @@ define void @addv_7i16(<7 x i16> addrspace(1)* %ptra, <7 x i16> addrspace(1)* %p ; GFX9-NEXT: global_store_short v[4:5], v8, off offset:12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] - %a = load <7 x i16>, <7 x i16> addrspace(1)* %ptra, align 4 - %b = load <7 x i16>, <7 x i16> addrspace(1)* %ptrb, align 4 + %a = load <7 x i16>, ptr addrspace(1) %ptra, align 4 + %b = load <7 x i16>, ptr addrspace(1) %ptrb, align 4 %add = add <7 x i16> %a, %b - store <7 x i16> %add, <7 x i16> addrspace(1)* %ptr2, align 4 + store <7 x i16> %add, ptr addrspace(1) %ptr2, align 4 ret void } @@ -484,7 +484,7 @@ define <7 x i16> @add_v7i16_arg(<7 x i16> %a, <7 x i16> %b) { ret <7 x i16> %add } -define void @add_v9i16(<9 x i16> addrspace(1)* %ptra, <9 x i16> addrspace(1)* %ptrb, <9 x i16> addrspace(1)* %ptr2) { +define void @add_v9i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) { ; GFX8-LABEL: add_v9i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -536,10 +536,10 @@ define void @add_v9i16(<9 x i16> addrspace(1)* %ptra, <9 x i16> addrspace(1)* %p ; GFX9-NEXT: global_store_short v[4:5], v6, off offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] - %a = load <9 x i16>, <9 x i16> addrspace(1)* %ptra, align 4 - %b = load <9 x i16>, <9 x i16> addrspace(1)* %ptrb, align 4 + %a = load <9 x i16>, ptr addrspace(1) %ptra, align 4 + %b = load <9 x i16>, ptr addrspace(1) %ptrb, align 4 %add = add <9 x i16> %a, %b - store <9 x i16> %add, <9 x i16> addrspace(1)* %ptr2, align 4 + store <9 x i16> %add, ptr addrspace(1) %ptr2, align 4 ret void } @@ -575,7 +575,7 @@ define <9 x i16> @add_v9i16_arg(<9 x i16> %a, <9 x i16> %b) { ret <9 x i16> %add } -define void @add_v10i16(<10 x i16> addrspace(1)* %ptra, <10 x i16> addrspace(1)* %ptrb, <10 x i16> addrspace(1)* %ptr2) { +define void @add_v10i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) { ; GFX8-LABEL: add_v10i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -629,14 +629,14 @@ define void @add_v10i16(<10 x i16> addrspace(1)* %ptra, <10 x i16> addrspace(1)* ; GFX9-NEXT: global_store_dword v[4:5], v6, off offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] - %a = load <10 x i16>, <10 x i16> addrspace(1)* %ptra, align 4 - %b = load <10 x i16>, <10 x i16> addrspace(1)* %ptrb, align 4 + %a = load <10 x i16>, ptr addrspace(1) %ptra, align 4 + %b = load <10 x i16>, ptr addrspace(1) %ptrb, align 4 %add = add <10 x i16> %a, %b - store <10 x i16> %add, <10 x i16> addrspace(1)* %ptr2, align 4 + store <10 x i16> %add, ptr addrspace(1) %ptr2, align 4 ret void } -define void @add_v11i16(<11 x i16> addrspace(1)* %ptra, <11 x i16> addrspace(1)* %ptrb, <11 x i16> addrspace(1)* %ptr2) { +define void @add_v11i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) { ; GFX8-LABEL: add_v11i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -722,10 +722,10 @@ define void @add_v11i16(<11 x i16> addrspace(1)* %ptra, <11 x i16> addrspace(1)* ; GFX9-NEXT: global_store_short v[4:5], v6, off offset:20 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] - %a = load <11 x i16>, <11 x i16> addrspace(1)* %ptra, align 4 - %b = load <11 x i16>, <11 x i16> addrspace(1)* %ptrb, align 4 + %a = load <11 x i16>, ptr addrspace(1) %ptra, align 4 + %b = load <11 x i16>, ptr addrspace(1) %ptrb, align 4 %add = add <11 x i16> %a, %b - store <11 x i16> %add, <11 x i16> addrspace(1)* %ptr2, align 4 + store <11 x i16> %add, ptr addrspace(1) %ptr2, align 4 ret void } @@ -765,7 +765,7 @@ define <11 x i16> @add_v11i16_arg(<11 x i16> %a, <11 x i16> %b) { ret <11 x i16> %add } -define void @add_v12i16(<12 x i16> addrspace(1)* %ptra, <12 x i16> addrspace(1)* %ptrb, <12 x i16> addrspace(1)* %ptr2) { +define void @add_v12i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) { ; GFX8-LABEL: add_v12i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -823,10 +823,10 @@ define void @add_v12i16(<12 x i16> addrspace(1)* %ptra, <12 x i16> addrspace(1)* ; GFX9-NEXT: global_store_dwordx2 v[4:5], v[6:7], off offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] - %a = load <12 x i16>, <12 x i16> addrspace(1)* %ptra, align 4 - %b = load <12 x i16>, <12 x i16> addrspace(1)* %ptrb, align 4 + %a = load <12 x i16>, ptr addrspace(1) %ptra, align 4 + %b = load <12 x i16>, ptr addrspace(1) %ptrb, align 4 %add = add <12 x i16> %a, %b - store <12 x i16> %add, <12 x i16> addrspace(1)* %ptr2, align 4 + store <12 x i16> %add, ptr addrspace(1) %ptr2, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll index 05cbd6f9942bf..5c451bffc2278 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll @@ -8,6 +8,6 @@ ; CHECK: {{%[0-9]+}}:_(s32) = G_ADD define amdgpu_kernel void @addi32(i32 %arg1, i32 %arg2) { %res = add i32 %arg1, %arg2 - store i32 %res, i32 addrspace(1)* undef + store i32 %res, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll index 031925033ec3c..ec5c224cb1b28 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck %s -declare hidden i32 addrspace(1)* @ext(i8 addrspace(1)*) +declare hidden ptr addrspace(1) @ext(ptr addrspace(1)) -define i32 addrspace(1)* @call_assert_align() { +define ptr addrspace(1) @call_assert_align() { ; CHECK-LABEL: call_assert_align: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -34,12 +34,12 @@ define i32 addrspace(1)* @call_assert_align() { ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: - %call = call align 4 i32 addrspace(1)* @ext(i8 addrspace(1)* null) - store volatile i32 0, i32 addrspace(1)* %call - ret i32 addrspace(1)* %call + %call = call align 4 ptr addrspace(1) @ext(ptr addrspace(1) null) + store volatile i32 0, ptr addrspace(1) %call + ret ptr addrspace(1) %call } -define i32 addrspace(1)* @tail_call_assert_align() { +define ptr addrspace(1) @tail_call_assert_align() { ; CHECK-LABEL: tail_call_assert_align: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -50,6 +50,6 @@ define i32 addrspace(1)* @tail_call_assert_align() { ; CHECK-NEXT: s_addc_u32 s17, s17, ext@rel32@hi+12 ; CHECK-NEXT: s_setpc_b64 s[16:17] entry: - %call = tail call align 4 i32 addrspace(1)* @ext(i8 addrspace(1)* null) - ret i32 addrspace(1)* %call + %call = tail call align 4 ptr addrspace(1) @ext(ptr addrspace(1) null) + ret ptr addrspace(1) %call } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local.ll index 1054bdcafd423..4618fc9fdce2d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local.ll @@ -8,8 +8,8 @@ ; GCN-NEXT: ds_read_u8 v0, v0{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define i8 @atomic_load_monotonic_i8(i8 addrspace(3)* %ptr) { - %load = load atomic i8, i8 addrspace(3)* %ptr monotonic, align 1 +define i8 @atomic_load_monotonic_i8(ptr addrspace(3) %ptr) { + %load = load atomic i8, ptr addrspace(3) %ptr monotonic, align 1 ret i8 %load } @@ -20,9 +20,9 @@ define i8 @atomic_load_monotonic_i8(i8 addrspace(3)* %ptr) { ; GCN-NEXT: ds_read_u8 v0, v0 offset:16{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define i8 @atomic_load_monotonic_i8_offset(i8 addrspace(3)* %ptr) { - %gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16 - %load = load atomic i8, i8 addrspace(3)* %gep monotonic, align 1 +define i8 @atomic_load_monotonic_i8_offset(ptr addrspace(3) %ptr) { + %gep = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 16 + %load = load atomic i8, ptr addrspace(3) %gep monotonic, align 1 ret i8 %load } @@ -33,8 +33,8 @@ define i8 @atomic_load_monotonic_i8_offset(i8 addrspace(3)* %ptr) { ; GCN-NEXT: ds_read_u16 v0, v0{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define i16 @atomic_load_monotonic_i16(i16 addrspace(3)* %ptr) { - %load = load atomic i16, i16 addrspace(3)* %ptr monotonic, align 2 +define i16 @atomic_load_monotonic_i16(ptr addrspace(3) %ptr) { + %load = load atomic i16, ptr addrspace(3) %ptr monotonic, align 2 ret i16 %load } @@ -45,9 +45,9 @@ define i16 @atomic_load_monotonic_i16(i16 addrspace(3)* %ptr) { ; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define i16 @atomic_load_monotonic_i16_offset(i16 addrspace(3)* %ptr) { - %gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16 - %load = load atomic i16, i16 addrspace(3)* %gep monotonic, align 2 +define i16 @atomic_load_monotonic_i16_offset(ptr addrspace(3) %ptr) { + %gep = getelementptr inbounds i16, ptr addrspace(3) %ptr, i16 16 + %load = load atomic i16, ptr addrspace(3) %gep monotonic, align 2 ret i16 %load } @@ -58,8 +58,8 @@ define i16 @atomic_load_monotonic_i16_offset(i16 addrspace(3)* %ptr) { ; GCN-NEXT: ds_read_b32 v0, v0{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define i32 @atomic_load_monotonic_i32(i32 addrspace(3)* %ptr) { - %load = load atomic i32, i32 addrspace(3)* %ptr monotonic, align 4 +define i32 @atomic_load_monotonic_i32(ptr addrspace(3) %ptr) { + %load = load atomic i32, ptr addrspace(3) %ptr monotonic, align 4 ret i32 %load } @@ -70,9 +70,9 @@ define i32 @atomic_load_monotonic_i32(i32 addrspace(3)* %ptr) { ; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define i32 @atomic_load_monotonic_i32_offset(i32 addrspace(3)* %ptr) { - %gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16 - %load = load atomic i32, i32 addrspace(3)* %gep monotonic, align 4 +define i32 @atomic_load_monotonic_i32_offset(ptr addrspace(3) %ptr) { + %gep = getelementptr inbounds i32, ptr addrspace(3) %ptr, i32 16 + %load = load atomic i32, ptr addrspace(3) %gep monotonic, align 4 ret i32 %load } @@ -83,8 +83,8 @@ define i32 @atomic_load_monotonic_i32_offset(i32 addrspace(3)* %ptr) { ; GCN-NEXT: ds_read_b64 v[0:1], v0{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define i64 @atomic_load_monotonic_i64(i64 addrspace(3)* %ptr) { - %load = load atomic i64, i64 addrspace(3)* %ptr monotonic, align 8 +define i64 @atomic_load_monotonic_i64(ptr addrspace(3) %ptr) { + %load = load atomic i64, ptr addrspace(3) %ptr monotonic, align 8 ret i64 %load } @@ -95,9 +95,9 @@ define i64 @atomic_load_monotonic_i64(i64 addrspace(3)* %ptr) { ; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define i64 @atomic_load_monotonic_i64_offset(i64 addrspace(3)* %ptr) { - %gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i32 16 - %load = load atomic i64, i64 addrspace(3)* %gep monotonic, align 8 +define i64 @atomic_load_monotonic_i64_offset(ptr addrspace(3) %ptr) { + %gep = getelementptr inbounds i64, ptr addrspace(3) %ptr, i32 16 + %load = load atomic i64, ptr addrspace(3) %gep monotonic, align 8 ret i64 %load } @@ -108,9 +108,9 @@ define i64 @atomic_load_monotonic_i64_offset(i64 addrspace(3)* %ptr) { ; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define float @atomic_load_monotonic_f32_offset(float addrspace(3)* %ptr) { - %gep = getelementptr inbounds float, float addrspace(3)* %ptr, i32 16 - %load = load atomic float, float addrspace(3)* %gep monotonic, align 4 +define float @atomic_load_monotonic_f32_offset(ptr addrspace(3) %ptr) { + %gep = getelementptr inbounds float, ptr addrspace(3) %ptr, i32 16 + %load = load atomic float, ptr addrspace(3) %gep monotonic, align 4 ret float %load } @@ -121,9 +121,9 @@ define float @atomic_load_monotonic_f32_offset(float addrspace(3)* %ptr) { ; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define double @atomic_load_monotonic_f64_offset(double addrspace(3)* %ptr) { - %gep = getelementptr inbounds double, double addrspace(3)* %ptr, i32 16 - %load = load atomic double, double addrspace(3)* %gep monotonic, align 8 +define double @atomic_load_monotonic_f64_offset(ptr addrspace(3) %ptr) { + %gep = getelementptr inbounds double, ptr addrspace(3) %ptr, i32 16 + %load = load atomic double, ptr addrspace(3) %gep monotonic, align 8 ret double %load } @@ -134,10 +134,10 @@ define double @atomic_load_monotonic_f64_offset(double addrspace(3)* %ptr) { ; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define i8* @atomic_load_monotonic_p0i8_offset(i8* addrspace(3)* %ptr) { - %gep = getelementptr inbounds i8*, i8* addrspace(3)* %ptr, i32 16 - %load = load atomic i8*, i8* addrspace(3)* %gep monotonic, align 8 - ret i8* %load +define ptr @atomic_load_monotonic_p0i8_offset(ptr addrspace(3) %ptr) { + %gep = getelementptr inbounds ptr, ptr addrspace(3) %ptr, i32 16 + %load = load atomic ptr, ptr addrspace(3) %gep monotonic, align 8 + ret ptr %load } ; GCN-LABEL: {{^}}atomic_load_monotonic_p3i8_offset: @@ -147,8 +147,8 @@ define i8* @atomic_load_monotonic_p0i8_offset(i8* addrspace(3)* %ptr) { ; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define i8 addrspace(3)* @atomic_load_monotonic_p3i8_offset(i8 addrspace(3)* addrspace(3)* %ptr) { - %gep = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %ptr, i32 16 - %load = load atomic i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %gep monotonic, align 4 - ret i8 addrspace(3)* %load +define ptr addrspace(3) @atomic_load_monotonic_p3i8_offset(ptr addrspace(3) %ptr) { + %gep = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %ptr, i32 16 + %load = load atomic ptr addrspace(3), ptr addrspace(3) %gep monotonic, align 4 + ret ptr addrspace(3) %load } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_store_local.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_store_local.ll index 63187c24c6333..0492985215b48 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_store_local.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_store_local.ll @@ -8,8 +8,8 @@ ; GCN-NEXT: ds_write_b8 v0, v1{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define void @atomic_store_monotonic_i8(i8 addrspace(3)* %ptr, i8 %val) { - store atomic i8 %val, i8 addrspace(3)* %ptr monotonic, align 1 +define void @atomic_store_monotonic_i8(ptr addrspace(3) %ptr, i8 %val) { + store atomic i8 %val, ptr addrspace(3) %ptr monotonic, align 1 ret void } @@ -20,9 +20,9 @@ define void @atomic_store_monotonic_i8(i8 addrspace(3)* %ptr, i8 %val) { ; GCN-NEXT: ds_write_b8 v0, v1 offset:16{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define void @atomic_store_monotonic_offset_i8(i8 addrspace(3)* %ptr, i8 %val) { - %gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16 - store atomic i8 %val, i8 addrspace(3)* %gep monotonic, align 1 +define void @atomic_store_monotonic_offset_i8(ptr addrspace(3) %ptr, i8 %val) { + %gep = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 16 + store atomic i8 %val, ptr addrspace(3) %gep monotonic, align 1 ret void } @@ -33,8 +33,8 @@ define void @atomic_store_monotonic_offset_i8(i8 addrspace(3)* %ptr, i8 %val) { ; GCN-NEXT: ds_write_b16 v0, v1{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define void @atomic_store_monotonic_i16(i16 addrspace(3)* %ptr, i16 %val) { - store atomic i16 %val, i16 addrspace(3)* %ptr monotonic, align 2 +define void @atomic_store_monotonic_i16(ptr addrspace(3) %ptr, i16 %val) { + store atomic i16 %val, ptr addrspace(3) %ptr monotonic, align 2 ret void } @@ -45,9 +45,9 @@ define void @atomic_store_monotonic_i16(i16 addrspace(3)* %ptr, i16 %val) { ; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define void @atomic_store_monotonic_offset_i16(i16 addrspace(3)* %ptr, i16 %val) { - %gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16 - store atomic i16 %val, i16 addrspace(3)* %gep monotonic, align 2 +define void @atomic_store_monotonic_offset_i16(ptr addrspace(3) %ptr, i16 %val) { + %gep = getelementptr inbounds i16, ptr addrspace(3) %ptr, i16 16 + store atomic i16 %val, ptr addrspace(3) %gep monotonic, align 2 ret void } @@ -58,8 +58,8 @@ define void @atomic_store_monotonic_offset_i16(i16 addrspace(3)* %ptr, i16 %val) ; GCN-NEXT: ds_write_b32 v0, v1{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define void @atomic_store_monotonic_i32(i32 addrspace(3)* %ptr, i32 %val) { - store atomic i32 %val, i32 addrspace(3)* %ptr monotonic, align 4 +define void @atomic_store_monotonic_i32(ptr addrspace(3) %ptr, i32 %val) { + store atomic i32 %val, ptr addrspace(3) %ptr monotonic, align 4 ret void } @@ -70,9 +70,9 @@ define void @atomic_store_monotonic_i32(i32 addrspace(3)* %ptr, i32 %val) { ; GCN-NEXT: ds_write_b32 v0, v1 offset:64{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define void @atomic_store_monotonic_offset_i32(i32 addrspace(3)* %ptr, i32 %val) { - %gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16 - store atomic i32 %val, i32 addrspace(3)* %gep monotonic, align 4 +define void @atomic_store_monotonic_offset_i32(ptr addrspace(3) %ptr, i32 %val) { + %gep = getelementptr inbounds i32, ptr addrspace(3) %ptr, i32 16 + store atomic i32 %val, ptr addrspace(3) %gep monotonic, align 4 ret void } @@ -83,8 +83,8 @@ define void @atomic_store_monotonic_offset_i32(i32 addrspace(3)* %ptr, i32 %val) ; GCN-NEXT: ds_write_b64 v0, v[1:2]{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define void @atomic_store_monotonic_i64(i64 addrspace(3)* %ptr, i64 %val) { - store atomic i64 %val, i64 addrspace(3)* %ptr monotonic, align 8 +define void @atomic_store_monotonic_i64(ptr addrspace(3) %ptr, i64 %val) { + store atomic i64 %val, ptr addrspace(3) %ptr monotonic, align 8 ret void } @@ -95,9 +95,9 @@ define void @atomic_store_monotonic_i64(i64 addrspace(3)* %ptr, i64 %val) { ; GCN-NEXT: ds_write_b64 v0, v[1:2] offset:128{{$}} ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 -define void @atomic_store_monotonic_offset_i64(i64 addrspace(3)* %ptr, i64 %val) { - %gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i64 16 - store atomic i64 %val, i64 addrspace(3)* %gep monotonic, align 8 +define void @atomic_store_monotonic_offset_i64(ptr addrspace(3) %ptr, i64 %val) { + %gep = getelementptr inbounds i64, ptr addrspace(3) %ptr, i64 16 + store atomic i64 %val, ptr addrspace(3) %gep monotonic, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll index cfc58aaae5be6..9fd265394e3c4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll @@ -77,11 +77,11 @@ entry: br i1 %trunc, label %bb0, label %bb1 bb0: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef unreachable bb1: - store volatile i32 1, i32 addrspace(1)* undef + store volatile i32 1, ptr addrspace(1) undef unreachable } @@ -118,10 +118,10 @@ entry: br i1 %and, label %bb0, label %bb1 bb0: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef unreachable bb1: - store volatile i32 1, i32 addrspace(1)* undef + store volatile i32 1, ptr addrspace(1) undef unreachable } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll index 35932d42a0646..44c3c3d333267 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s -define void @value_finder_bug(<2 x float> addrspace(5)* %store_ptr, <4 x float> addrspace(4)* %ptr) { +define void @value_finder_bug(ptr addrspace(5) %store_ptr, ptr addrspace(4) %ptr) { ; GFX10-LABEL: value_finder_bug: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -12,10 +12,10 @@ define void @value_finder_bug(<2 x float> addrspace(5)* %store_ptr, <4 x float> ; GFX10-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] - %vec = load <4 x float>, <4 x float> addrspace(4)* %ptr, align 4 + %vec = load <4 x float>, ptr addrspace(4) %ptr, align 4 %vec.3 = extractelement <4 x float> %vec, i32 3 %shuffle = shufflevector <4 x float> %vec, <4 x float> undef, <2 x i32> %new_vec = insertelement <2 x float> %shuffle, float %vec.3, i32 1 - store <2 x float> %new_vec, <2 x float> addrspace(5)* %store_ptr, align 8 + store <2 x float> %new_vec, ptr addrspace(5) %store_ptr, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll index a607ccb6946a0..37592a7f99ee9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll @@ -7,7 +7,7 @@ ; tests are unified. declare hidden void @external_void_func_v16i32_v16i32_v4i32(<16 x i32>, <16 x i32>, <4 x i32>) #0 -declare hidden void @external_void_func_byval([16 x i32] addrspace(5)* byval([16 x i32])) #0 +declare hidden void @external_void_func_byval(ptr addrspace(5) byval([16 x i32])) #0 define amdgpu_kernel void @kernel_caller_stack() { ; MUBUF-LABEL: kernel_caller_stack: @@ -173,6 +173,7 @@ define amdgpu_kernel void @kernel_caller_byval() { ; FLATSCR-NEXT: s_mov_b32 s2, 0 ; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 ; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: s_mov_b32 s40, 0 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s11 offset:24 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s10 offset:32 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s9 offset:40 @@ -187,7 +188,6 @@ define amdgpu_kernel void @kernel_caller_byval() { ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s2 offset:112 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_lo offset:120 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:128 -; FLATSCR-NEXT: s_mov_b32 s40, 0 ; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s40 offset:8 ; FLATSCR-NEXT: s_mov_b32 s39, 0 ; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s39 offset:16 @@ -226,9 +226,8 @@ define amdgpu_kernel void @kernel_caller_byval() { ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] ; FLATSCR-NEXT: s_endpgm %alloca = alloca [16 x i32], align 4, addrspace(5) - %cast = bitcast [16 x i32] addrspace(5)* %alloca to i8 addrspace(5)* - call void @llvm.memset.p5i8.i32(i8 addrspace(5)* align 4 %cast, i8 0, i32 128, i1 false) - call void @external_void_func_byval([16 x i32] addrspace(5)* byval([16 x i32]) %alloca) + call void @llvm.memset.p5.i32(ptr addrspace(5) align 4 %alloca, i8 0, i32 128, i1 false) + call void @external_void_func_byval(ptr addrspace(5) byval([16 x i32]) %alloca) ret void } @@ -302,7 +301,7 @@ define void @func_caller_stack() { ret void } -define void @func_caller_byval([16 x i32] addrspace(5)* %argptr) { +define void @func_caller_byval(ptr addrspace(5) %argptr) { ; MUBUF-LABEL: func_caller_byval: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -431,12 +430,11 @@ define void @func_caller_byval([16 x i32] addrspace(5)* %argptr) { ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_setpc_b64 s[30:31] - %cast = bitcast [16 x i32] addrspace(5)* %argptr to i8 addrspace(5)* - call void @external_void_func_byval([16 x i32] addrspace(5)* byval([16 x i32]) %argptr) + call void @external_void_func_byval(ptr addrspace(5) byval([16 x i32]) %argptr) ret void } -declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture writeonly, i8, i32, i1 immarg) #1 +declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture writeonly, i8, i32, i1 immarg) #1 attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } attributes #1 = { argmemonly nofree nounwind willreturn writeonly } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll index e39c3e9d3339a..cbbf9d17b3dc6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll @@ -128,7 +128,7 @@ define float @test_f32_add_mul_rhs(float %x, float %y, float %z) { ret float %b } -define float @test_add_mul_multiple_defs_z(float %x, float %y, <2 x float> addrspace(1)* %vec_ptr) { +define float @test_add_mul_multiple_defs_z(float %x, float %y, ptr addrspace(1) %vec_ptr) { ; GFX9-LABEL: test_add_mul_multiple_defs_z: ; GFX9: ; %bb.0: ; %.entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -204,13 +204,13 @@ define float @test_add_mul_multiple_defs_z(float %x, float %y, <2 x float> addrs ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: %a = fmul float %x, %y - %vec = load <2 x float>, <2 x float> addrspace(1)* %vec_ptr + %vec = load <2 x float>, ptr addrspace(1) %vec_ptr %z = extractelement <2 x float> %vec, i64 1 %b = fadd float %a, %z ret float %b } -define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, <2 x float> addrspace(1)* %vec_ptr) { +define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, ptr addrspace(1) %vec_ptr) { ; GFX9-LABEL: test_add_mul_rhs_multiple_defs_z: ; GFX9: ; %bb.0: ; %.entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -286,7 +286,7 @@ define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, <2 x float> a ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: %a = fmul float %x, %y - %vec = load <2 x float>, <2 x float> addrspace(1)* %vec_ptr + %vec = load <2 x float>, ptr addrspace(1) %vec_ptr %z = extractelement <2 x float> %vec, i64 1 %b = fadd float %z, %a ret float %b diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combiner-crash.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combiner-crash.ll index b337a37dae99f..356ef52bf21b6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combiner-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combiner-crash.ll @@ -3,6 +3,6 @@ define amdgpu_kernel void @test_long_add4() { entry: %add = add <4 x i64> zeroinitializer, zeroinitializer - store <4 x i64> %add, <4 x i64> addrspace(1)* null, align 32 + store <4 x i64> %add, ptr addrspace(1) null, align 32 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll index 827574baafa79..9247eca4b6754 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll @@ -20,6 +20,6 @@ define amdgpu_kernel void @stack_write_fi() { ; CHECK-NEXT: s_endpgm entry: %alloca = alloca i64, align 4, addrspace(5) - store volatile i64 0, i64 addrspace(5)* %alloca, align 4 + store volatile i64 0, ptr addrspace(5) %alloca, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll index c8eb1a4acc351..58a1adaafd0b6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll @@ -96,7 +96,7 @@ define float @v_uitofp_to_f32_multi_use_lshr8_mask255(i32 %arg0) nounwind { ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] %lshr.8 = lshr i32 %arg0, 8 - store i32 %lshr.8, i32 addrspace(1)* undef + store i32 %lshr.8, ptr addrspace(1) undef %masked = and i32 %lshr.8, 255 %cvt = uitofp i32 %masked to float ret float %cvt @@ -449,7 +449,7 @@ define double @v_uitofp_i8_to_f64(i8 %arg0) nounwind { ret double %cvt } -define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @load_i8_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: load_i8_to_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -483,14 +483,14 @@ define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 a ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid - %load = load i8, i8 addrspace(1)* %gep, align 1 + %gep = getelementptr i8, ptr addrspace(1) %in, i32 %tid + %load = load i8, ptr addrspace(1) %gep, align 1 %cvt = uitofp i8 %load to float - store float %cvt, float addrspace(1)* %out, align 4 + store float %cvt, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @load_v2i8_to_v2f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: load_v2i8_to_v2f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -529,14 +529,14 @@ define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <2 x i8>, <2 x i8> addrspace(1)* %in, i32 %tid - %load = load <2 x i8>, <2 x i8> addrspace(1)* %gep, align 2 + %gep = getelementptr <2 x i8>, ptr addrspace(1) %in, i32 %tid + %load = load <2 x i8>, ptr addrspace(1) %gep, align 2 %cvt = uitofp <2 x i8> %load to <2 x float> - store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16 + store <2 x float> %cvt, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @load_v3i8_to_v3f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: load_v3i8_to_v3f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -579,14 +579,14 @@ define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias ; VI-NEXT: flat_store_dwordx3 v[3:4], v[0:2] ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %in, i32 %tid - %load = load <3 x i8>, <3 x i8> addrspace(1)* %gep, align 4 + %gep = getelementptr <3 x i8>, ptr addrspace(1) %in, i32 %tid + %load = load <3 x i8>, ptr addrspace(1) %gep, align 4 %cvt = uitofp <3 x i8> %load to <3 x float> - store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16 + store <3 x float> %cvt, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @load_v4i8_to_v4f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: load_v4i8_to_v4f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -630,10 +630,10 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias ; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid - %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 4 + %gep = getelementptr <4 x i8>, ptr addrspace(1) %in, i32 %tid + %load = load <4 x i8>, ptr addrspace(1) %gep, align 4 %cvt = uitofp <4 x i8> %load to <4 x float> - store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 + store <4 x float> %cvt, ptr addrspace(1) %out, align 16 ret void } @@ -641,7 +641,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias ; position in the word for the component. ; FIXME: Packing bytes -define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: load_v4i8_to_v4f32_unaligned: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -715,14 +715,14 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1) ; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid - %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1 + %gep = getelementptr <4 x i8>, ptr addrspace(1) %in, i32 %tid + %load = load <4 x i8>, ptr addrspace(1) %gep, align 1 %cvt = uitofp <4 x i8> %load to <4 x float> - store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 + store <4 x float> %cvt, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %out2, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: load_v4i8_to_v4f32_2_uses: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd @@ -808,16 +808,16 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %tid.x = call i32 @llvm.amdgcn.workitem.id.x() - %in.ptr = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x - %load = load <4 x i8>, <4 x i8> addrspace(1)* %in.ptr, align 4 + %in.ptr = getelementptr <4 x i8>, ptr addrspace(1) %in, i32 %tid.x + %load = load <4 x i8>, ptr addrspace(1) %in.ptr, align 4 %cvt = uitofp <4 x i8> %load to <4 x float> - store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 + store <4 x float> %cvt, ptr addrspace(1) %out, align 16 %add = add <4 x i8> %load, ; Second use of %load - store <4 x i8> %add, <4 x i8> addrspace(1)* %out2, align 4 + store <4 x i8> %add, ptr addrspace(1) %out2, align 4 ret void } -define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @load_v7i8_to_v7f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: load_v7i8_to_v7f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -907,14 +907,14 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias ; VI-NEXT: flat_store_dwordx3 v[9:10], v[4:6] ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <7 x i8>, <7 x i8> addrspace(1)* %in, i32 %tid - %load = load <7 x i8>, <7 x i8> addrspace(1)* %gep, align 1 + %gep = getelementptr <7 x i8>, ptr addrspace(1) %in, i32 %tid + %load = load <7 x i8>, ptr addrspace(1) %gep, align 1 %cvt = uitofp <7 x i8> %load to <7 x float> - store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16 + store <7 x float> %cvt, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @load_v8i8_to_v8f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: load_v8i8_to_v8f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -975,14 +975,14 @@ define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias ; VI-NEXT: flat_store_dwordx4 v[10:11], v[4:7] ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %in, i32 %tid - %load = load <8 x i8>, <8 x i8> addrspace(1)* %gep, align 8 + %gep = getelementptr <8 x i8>, ptr addrspace(1) %in, i32 %tid + %load = load <8 x i8>, ptr addrspace(1) %gep, align 8 %cvt = uitofp <8 x i8> %load to <8 x float> - store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16 + store <8 x float> %cvt, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: i8_zext_inreg_i32_to_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1020,16 +1020,16 @@ define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %load = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %load = load i32, ptr addrspace(1) %gep, align 4 %add = add i32 %load, 2 %inreg = and i32 %add, 255 %cvt = uitofp i32 %inreg to float - store float %cvt, float addrspace(1)* %out, align 4 + store float %cvt, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: i8_zext_inreg_hi1_to_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1065,18 +1065,18 @@ define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %load = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %load = load i32, ptr addrspace(1) %gep, align 4 %inreg = and i32 %load, 65280 %shr = lshr i32 %inreg, 8 %cvt = uitofp i32 %shr to float - store float %cvt, float addrspace(1)* %out, align 4 + store float %cvt, ptr addrspace(1) %out, align 4 ret void } ; We don't get these ones because of the zext, but instcombine removes ; them so it shouldn't really matter. -define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @i8_zext_i32_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: i8_zext_i32_to_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1110,15 +1110,15 @@ define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid - %load = load i8, i8 addrspace(1)* %gep, align 1 + %gep = getelementptr i8, ptr addrspace(1) %in, i32 %tid + %load = load i8, ptr addrspace(1) %gep, align 1 %ext = zext i8 %load to i32 %cvt = uitofp i32 %ext to float - store float %cvt, float addrspace(1)* %out, align 4 + store float %cvt, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: v4i8_zext_v4i32_to_v4f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1192,15 +1192,15 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* no ; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid - %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1 + %gep = getelementptr <4 x i8>, ptr addrspace(1) %in, i32 %tid + %load = load <4 x i8>, ptr addrspace(1) %gep, align 1 %ext = zext <4 x i8> %load to <4 x i32> %cvt = uitofp <4 x i32> %ext to <4 x float> - store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 + store <4 x float> %cvt, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @extract_byte0_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: extract_byte0_to_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1235,15 +1235,15 @@ define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %gep %and = and i32 %val, 255 %cvt = uitofp i32 %and to float - store float %cvt, float addrspace(1)* %out + store float %cvt, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @extract_byte1_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: extract_byte1_to_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1279,16 +1279,16 @@ define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %gep %srl = lshr i32 %val, 8 %and = and i32 %srl, 255 %cvt = uitofp i32 %and to float - store float %cvt, float addrspace(1)* %out + store float %cvt, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @extract_byte2_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: extract_byte2_to_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1324,16 +1324,16 @@ define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %gep %srl = lshr i32 %val, 16 %and = and i32 %srl, 255 %cvt = uitofp i32 %and to float - store float %cvt, float addrspace(1)* %out + store float %cvt, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @extract_byte3_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @extract_byte3_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: extract_byte3_to_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1368,16 +1368,16 @@ define amdgpu_kernel void @extract_byte3_to_f32(float addrspace(1)* noalias %out ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %gep %srl = lshr i32 %val, 24 %and = and i32 %srl, 255 %cvt = uitofp i32 %and to float - store float %cvt, float addrspace(1)* %out + store float %cvt, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @cvt_ubyte0_or_multiuse(i32 addrspace(1)* %in, float addrspace(1)* %out) { +define amdgpu_kernel void @cvt_ubyte0_or_multiuse(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; SI-LABEL: cvt_ubyte0_or_multiuse: ; SI: ; %bb.0: ; %bb ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1418,14 +1418,14 @@ define amdgpu_kernel void @cvt_ubyte0_or_multiuse(i32 addrspace(1)* %in, float a ; VI-NEXT: s_endpgm bb: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %lid - %load = load i32, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %lid + %load = load i32, ptr addrspace(1) %gep %or = or i32 %load, -2147483647 %and = and i32 %or, 255 %uitofp = uitofp i32 %and to float %cast = bitcast i32 %or to float %add = fadd float %cast, %uitofp - store float %add, float addrspace(1)* %out + store float %add, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll index af883e8d6a91c..25e2267fdee89 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -23,7 +23,7 @@ entry: br i1 %c, label %if.true, label %endif if.true: - %val = load volatile i32, i32 addrspace(1)* undef + %val = load volatile i32, ptr addrspace(1) undef br label %endif endif: @@ -55,7 +55,7 @@ endif: ret i32 %v if.true: - %val = load volatile i32, i32 addrspace(1)* undef + %val = load volatile i32, ptr addrspace(1) undef br label %endif } @@ -81,7 +81,7 @@ entry: br i1 %c, label %if.true, label %endif if.true: - %val = load volatile i32, i32 addrspace(1)* undef + %val = load volatile i32, ptr addrspace(1) undef br label %endif endif: @@ -90,7 +90,7 @@ endif: } ; Make sure and 1 is inserted on llvm.amdgcn.if -define i32 @divergent_if_nonboolean_condition1(i32 addrspace(1)* %ptr) { +define i32 @divergent_if_nonboolean_condition1(ptr addrspace(1) %ptr) { ; CHECK-LABEL: divergent_if_nonboolean_condition1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -109,12 +109,12 @@ define i32 @divergent_if_nonboolean_condition1(i32 addrspace(1)* %ptr) { ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: - %value = load i32, i32 addrspace(1)* %ptr + %value = load i32, ptr addrspace(1) %ptr %c = trunc i32 %value to i1 br i1 %c, label %if.true, label %endif if.true: - %val = load volatile i32, i32 addrspace(1)* undef + %val = load volatile i32, ptr addrspace(1) undef br label %endif endif: @@ -123,7 +123,7 @@ endif: } @external_constant = external addrspace(4) constant i32, align 4 -@const.ptr = external addrspace(4) constant float*, align 4 +@const.ptr = external addrspace(4) constant ptr, align 4 ; Make sure this case compiles. G_ICMP was mis-mapped due to having ; the result register class constrained by llvm.amdgcn.if lowering. @@ -167,13 +167,13 @@ define void @constrained_if_register_class() { ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] bb: - %tmp = load i32, i32 addrspace(4)* @external_constant + %tmp = load i32, ptr addrspace(4) @external_constant %tmp1 = icmp ne i32 %tmp, 0 br i1 %tmp1, label %bb12, label %bb2 bb2: - %ptr = load float*, float* addrspace(4)* @const.ptr - %tmp4 = load float, float* %ptr, align 4 + %ptr = load ptr, ptr addrspace(4) @const.ptr + %tmp4 = load float, ptr %ptr, align 4 %tmp5 = fcmp olt float %tmp4, 1.0 %tmp6 = or i1 %tmp5, false br i1 %tmp6, label %bb8, label %bb7 @@ -187,7 +187,7 @@ bb8: br i1 %tmp10, label %bb11, label %bb12 bb11: - store float 4.0, float addrspace(5)* undef, align 4 + store float 4.0, ptr addrspace(5) undef, align 4 br label %bb12 bb12: @@ -235,7 +235,7 @@ bb1: br i1 %cmp0, label %bb4, label %bb9 bb4: - %load = load volatile i32, i32 addrspace(1)* undef, align 4 + %load = load volatile i32, ptr addrspace(1) undef, align 4 %cmp1 = icmp slt i32 %tmp, %load br i1 %cmp1, label %bb1, label %bb9 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll index 649d9a4d5966d..bb6f809df53f4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll @@ -55,7 +55,7 @@ entry: ret void } -define void @returnaddress_debug_loc(i8* addrspace(1)* %ptr) { +define void @returnaddress_debug_loc(ptr addrspace(1) %ptr) { ; CHECK-LABEL: name: returnaddress_debug_loc ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 @@ -68,12 +68,12 @@ define void @returnaddress_debug_loc(i8* addrspace(1)* %ptr) { ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE]], [[COPY3]], 0, 0, implicit $exec :: (store (p0) into %ir.ptr, addrspace 1) ; CHECK-NEXT: SI_RETURN entry: - %returnaddr = call i8* @llvm.returnaddress(i32 0), !dbg !6 - store i8* %returnaddr, i8* addrspace(1)* %ptr, align 8 + %returnaddr = call ptr @llvm.returnaddress(i32 0), !dbg !6 + store ptr %returnaddr, ptr addrspace(1) %ptr, align 8 ret void } -declare i8* @llvm.returnaddress(i32 immarg) #0 +declare ptr @llvm.returnaddress(i32 immarg) #0 attributes #0 = { nofree nosync nounwind readnone willreturn } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll index 256b95d222bea..5dae7885f6bfb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll @@ -2,24 +2,24 @@ ; ERR: remark: :0:0: cannot select: %{{[0-9]+}}:sreg_32(p5) = G_DYN_STACKALLOC %{{[0-9]+}}:vgpr(s32), 1 (in function: kernel_dynamic_stackalloc_vgpr_align4) ; ERR-NEXT: warning: Instruction selection used fallback path for kernel_dynamic_stackalloc_vgpr_align4 -; ERR-NEXT: error: :0:0: in function kernel_dynamic_stackalloc_vgpr_align4 void (i32 addrspace(1)*): unsupported dynamic alloca +; ERR-NEXT: error: :0:0: in function kernel_dynamic_stackalloc_vgpr_align4 void (ptr addrspace(1)): unsupported dynamic alloca ; ERR: remark: :0:0: cannot select: %{{[0-9]+}}:sreg_32(p5) = G_DYN_STACKALLOC %{{[0-9]+}}:vgpr(s32), 1 (in function: func_dynamic_stackalloc_vgpr_align4) ; ERR-NEXT: warning: Instruction selection used fallback path for func_dynamic_stackalloc_vgpr_align4 ; ERR-NEXT: error: :0:0: in function func_dynamic_stackalloc_vgpr_align4 void (i32): unsupported dynamic alloca -define amdgpu_kernel void @kernel_dynamic_stackalloc_vgpr_align4(i32 addrspace(1)* %ptr) { +define amdgpu_kernel void @kernel_dynamic_stackalloc_vgpr_align4(ptr addrspace(1) %ptr) { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id - %n = load i32, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %id + %n = load i32, ptr addrspace(1) %gep %alloca = alloca i32, i32 %n, align 4, addrspace(5) - store volatile i32 addrspace(5)* %alloca, i32 addrspace(5)* addrspace(1)* undef + store volatile ptr addrspace(5) %alloca, ptr addrspace(1) undef ret void } define void @func_dynamic_stackalloc_vgpr_align4(i32 %n) { %alloca = alloca i32, i32 %n, align 4, addrspace(5) - store volatile i32 addrspace(5)* %alloca, i32 addrspace(5)* addrspace(1)* undef + store volatile ptr addrspace(5) %alloca, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll index 50ab759f01279..2e3b716750042 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll @@ -57,7 +57,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align4(i32 %n) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %alloca = alloca i32, i32 %n, align 4, addrspace(5) - store i32 0, i32 addrspace(5)* %alloca + store i32 0, ptr addrspace(5) %alloca ret void } @@ -139,9 +139,9 @@ define void @func_dynamic_stackalloc_sgpr_align4() { ; GFX11-NEXT: scratch_store_b32 off, v0, s0 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %n = load i32, i32 addrspace(4)* @gv, align 4 + %n = load i32, ptr addrspace(4) @gv, align 4 %alloca = alloca i32, i32 %n, addrspace(5) - store i32 0, i32 addrspace(5)* %alloca + store i32 0, ptr addrspace(5) %alloca ret void } @@ -197,7 +197,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align16(i32 %n) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %alloca = alloca i32, i32 %n, align 16, addrspace(5) - store i32 0, i32 addrspace(5)* %alloca + store i32 0, ptr addrspace(5) %alloca ret void } @@ -279,9 +279,9 @@ define void @func_dynamic_stackalloc_sgpr_align16() { ; GFX11-NEXT: scratch_store_b32 off, v0, s0 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %n = load i32, i32 addrspace(4)* @gv, align 16 + %n = load i32, ptr addrspace(4) @gv, align 16 %alloca = alloca i32, i32 %n, addrspace(5) - store i32 0, i32 addrspace(5)* %alloca + store i32 0, ptr addrspace(5) %alloca ret void } @@ -340,11 +340,11 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %alloca = alloca i32, i32 %n, align 32, addrspace(5) - store i32 0, i32 addrspace(5)* %alloca + store i32 0, ptr addrspace(5) %alloca ret void } -define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) { +define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) { ; GFX9-LABEL: func_dynamic_stackalloc_sgpr_align32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -428,8 +428,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) { ; GFX11-NEXT: scratch_store_b32 off, v0, s0 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %n = load i32, i32 addrspace(4)* @gv + %n = load i32, ptr addrspace(4) @gv %alloca = alloca i32, i32 %n, align 32, addrspace(5) - store i32 0, i32 addrspace(5)* %alloca + store i32 0, ptr addrspace(5) %alloca ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll index 1e4438526779a..7983cba3478db 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll @@ -4,7 +4,7 @@ ; Check lowering of some large extractelement that use the stack ; instead of register indexing. -define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) { +define i32 @v_extract_v64i32_varidx(ptr addrspace(1) %ptr, i32 %idx) { ; GCN-LABEL: v_extract_v64i32_varidx: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -174,12 +174,12 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr + %vec = load <64 x i32>, ptr addrspace(1) %ptr %elt = extractelement <64 x i32> %vec, i32 %idx ret i32 %elt } -define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { +define i16 @v_extract_v128i16_varidx(ptr addrspace(1) %ptr, i32 %idx) { ; GCN-LABEL: v_extract_v128i16_varidx: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -353,12 +353,12 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %vec = load <128 x i16>, <128 x i16> addrspace(1)* %ptr + %vec = load <128 x i16>, ptr addrspace(1) %ptr %elt = extractelement <128 x i16> %vec, i32 %idx ret i16 %elt } -define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) { +define i64 @v_extract_v32i64_varidx(ptr addrspace(1) %ptr, i32 %idx) { ; GCN-LABEL: v_extract_v32i64_varidx: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -529,7 +529,7 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %vec = load <32 x i64>, <32 x i64> addrspace(1)* %ptr + %vec = load <32 x i64>, ptr addrspace(1) %ptr %elt = extractelement <32 x i64> %vec, i32 %idx ret i64 %elt } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll index 093b40114d5ff..5563c5fb41dc6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll @@ -5,7 +5,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s -define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(<4 x i128> addrspace(4)* inreg %ptr, i32 inreg %idx) { +define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) { ; GCN-LABEL: extractelement_sgpr_v4i128_sgpr_idx: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x0 @@ -32,12 +32,12 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(<4 x i128> addrspace( ; GFX11-NEXT: s_movrels_b64 s[0:1], s[8:9] ; GFX11-NEXT: s_movrels_b64 s[2:3], s[10:11] ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr + %vector = load <4 x i128>, ptr addrspace(4) %ptr %element = extractelement <4 x i128> %vector, i32 %idx ret i128 %element } -define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(<4 x i128> addrspace(1)* %ptr, i32 inreg %idx) { +define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) { ; GFX9-LABEL: extractelement_vgpr_v4i128_sgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off @@ -149,12 +149,12 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(<4 x i128> addrspace( ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-NEXT: v_readfirstlane_b32 s2, v2 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr + %vector = load <4 x i128>, ptr addrspace(1) %ptr %element = extractelement <4 x i128> %vector, i32 %idx ret i128 %element } -define i128 @extractelement_vgpr_v4i128_vgpr_idx(<4 x i128> addrspace(1)* %ptr, i32 %idx) { +define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) { ; GFX9-LABEL: extractelement_vgpr_v4i128_vgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -452,12 +452,12 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(<4 x i128> addrspace(1)* %ptr, ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, v15, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr + %vector = load <4 x i128>, ptr addrspace(1) %ptr %element = extractelement <4 x i128> %vector, i32 %idx ret i128 %element } -define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(<4 x i128> addrspace(4)* inreg %ptr, i32 %idx) { +define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) { ; GFX9-LABEL: extractelement_sgpr_v4i128_vgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx16 s[0:15], s[2:3], 0x0 @@ -786,12 +786,12 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(<4 x i128> addrspace( ; GFX11-NEXT: v_readfirstlane_b32 s2, v2 ; GFX11-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr + %vector = load <4 x i128>, ptr addrspace(4) %ptr %element = extractelement <4 x i128> %vector, i32 %idx ret i128 %element } -define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx0(<4 x i128> addrspace(4)* inreg %ptr) { +define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx0(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v4i128_idx0: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx16 s[0:15], s[2:3], 0x0 @@ -809,12 +809,12 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx0(<4 x i128> addrspace(4)* ; GFX11-NEXT: s_load_b512 s[0:15], s[2:3], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr + %vector = load <4 x i128>, ptr addrspace(4) %ptr %element = extractelement <4 x i128> %vector, i32 0 ret i128 %element } -define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx1(<4 x i128> addrspace(4)* inreg %ptr) { +define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx1(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v4i128_idx1: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx16 s[0:15], s[2:3], 0x0 @@ -844,12 +844,12 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx1(<4 x i128> addrspace(4)* ; GFX11-NEXT: s_mov_b32 s2, s6 ; GFX11-NEXT: s_mov_b32 s3, s7 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr + %vector = load <4 x i128>, ptr addrspace(4) %ptr %element = extractelement <4 x i128> %vector, i32 1 ret i128 %element } -define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx2(<4 x i128> addrspace(4)* inreg %ptr) { +define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx2(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v4i128_idx2: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx16 s[0:15], s[2:3], 0x0 @@ -879,12 +879,12 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx2(<4 x i128> addrspace(4)* ; GFX11-NEXT: s_mov_b32 s2, s10 ; GFX11-NEXT: s_mov_b32 s3, s11 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr + %vector = load <4 x i128>, ptr addrspace(4) %ptr %element = extractelement <4 x i128> %vector, i32 2 ret i128 %element } -define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx3(<4 x i128> addrspace(4)* inreg %ptr) { +define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx3(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v4i128_idx3: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx16 s[0:15], s[2:3], 0x0 @@ -914,12 +914,12 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx3(<4 x i128> addrspace(4)* ; GFX11-NEXT: s_mov_b32 s2, s14 ; GFX11-NEXT: s_mov_b32 s3, s15 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr + %vector = load <4 x i128>, ptr addrspace(4) %ptr %element = extractelement <4 x i128> %vector, i32 3 ret i128 %element } -define i128 @extractelement_vgpr_v4i128_idx0(<4 x i128> addrspace(1)* %ptr) { +define i128 @extractelement_vgpr_v4i128_idx0(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i128_idx0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -959,12 +959,12 @@ define i128 @extractelement_vgpr_v4i128_idx0(<4 x i128> addrspace(1)* %ptr) { ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr + %vector = load <4 x i128>, ptr addrspace(1) %ptr %element = extractelement <4 x i128> %vector, i32 0 ret i128 %element } -define i128 @extractelement_vgpr_v4i128_idx1(<4 x i128> addrspace(1)* %ptr) { +define i128 @extractelement_vgpr_v4i128_idx1(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i128_idx1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1006,12 +1006,12 @@ define i128 @extractelement_vgpr_v4i128_idx1(<4 x i128> addrspace(1)* %ptr) { ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr + %vector = load <4 x i128>, ptr addrspace(1) %ptr %element = extractelement <4 x i128> %vector, i32 1 ret i128 %element } -define i128 @extractelement_vgpr_v4i128_idx2(<4 x i128> addrspace(1)* %ptr) { +define i128 @extractelement_vgpr_v4i128_idx2(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i128_idx2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1053,12 +1053,12 @@ define i128 @extractelement_vgpr_v4i128_idx2(<4 x i128> addrspace(1)* %ptr) { ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:32 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr + %vector = load <4 x i128>, ptr addrspace(1) %ptr %element = extractelement <4 x i128> %vector, i32 2 ret i128 %element } -define i128 @extractelement_vgpr_v4i128_idx3(<4 x i128> addrspace(1)* %ptr) { +define i128 @extractelement_vgpr_v4i128_idx3(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i128_idx3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1100,7 +1100,7 @@ define i128 @extractelement_vgpr_v4i128_idx3(<4 x i128> addrspace(1)* %ptr) { ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:48 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr + %vector = load <4 x i128>, ptr addrspace(1) %ptr %element = extractelement <4 x i128> %vector, i32 3 ret i128 %element } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll index 7b0162aa7d3d4..c0191db08bbff 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll @@ -5,7 +5,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s -define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(<4 x i16> addrspace(4)* inreg %ptr, i32 inreg %idx) { +define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) { ; GCN-LABEL: extractelement_sgpr_v4i16_sgpr_idx: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -43,12 +43,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(<4 x i16> addrspace(4)* ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_lshr_b32 s0, s0, s1 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr + %vector = load <4 x i16>, ptr addrspace(4) %ptr %element = extractelement <4 x i16> %vector, i32 %idx ret i16 %element } -define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(<4 x i16> addrspace(1)* %ptr, i32 inreg %idx) { +define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) { ; GFX9-LABEL: extractelement_vgpr_v4i16_sgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off @@ -115,12 +115,12 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(<4 x i16> addrspace(1)* ; GFX11-NEXT: v_lshrrev_b32_e32 v0, s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr + %vector = load <4 x i16>, ptr addrspace(1) %ptr %element = extractelement <4 x i16> %vector, i32 %idx ret i16 %element } -define i16 @extractelement_vgpr_v4i16_vgpr_idx(<4 x i16> addrspace(1)* %ptr, i32 %idx) { +define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) { ; GFX9-LABEL: extractelement_vgpr_v4i16_vgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -188,12 +188,12 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(<4 x i16> addrspace(1)* %ptr, i32 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr + %vector = load <4 x i16>, ptr addrspace(1) %ptr %element = extractelement <4 x i16> %vector, i32 %idx ret i16 %element } -define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(<4 x i16> addrspace(4)* inreg %ptr, i32 %idx) { +define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) { ; GCN-LABEL: extractelement_sgpr_v4i16_vgpr_idx: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -238,12 +238,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(<4 x i16> addrspace(4)* ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr + %vector = load <4 x i16>, ptr addrspace(4) %ptr %element = extractelement <4 x i16> %vector, i32 %idx ret i16 %element } -define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx0(<4 x i16> addrspace(4)* inreg %ptr) { +define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx0(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v4i16_idx0: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -261,12 +261,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx0(<4 x i16> addrspace(4)* inr ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr + %vector = load <4 x i16>, ptr addrspace(4) %ptr %element = extractelement <4 x i16> %vector, i32 0 ret i16 %element } -define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx1(<4 x i16> addrspace(4)* inreg %ptr) { +define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx1(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v4i16_idx1: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -287,12 +287,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx1(<4 x i16> addrspace(4)* inr ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshr_b32 s0, s0, 16 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr + %vector = load <4 x i16>, ptr addrspace(4) %ptr %element = extractelement <4 x i16> %vector, i32 1 ret i16 %element } -define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx2(<4 x i16> addrspace(4)* inreg %ptr) { +define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx2(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v4i16_idx2: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -313,12 +313,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx2(<4 x i16> addrspace(4)* inr ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, s1 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr + %vector = load <4 x i16>, ptr addrspace(4) %ptr %element = extractelement <4 x i16> %vector, i32 2 ret i16 %element } -define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx3(<4 x i16> addrspace(4)* inreg %ptr) { +define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx3(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v4i16_idx3: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -339,12 +339,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx3(<4 x i16> addrspace(4)* inr ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshr_b32 s0, s1, 16 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr + %vector = load <4 x i16>, ptr addrspace(4) %ptr %element = extractelement <4 x i16> %vector, i32 3 ret i16 %element } -define i16 @extractelement_vgpr_v4i16_idx0(<4 x i16> addrspace(1)* %ptr) { +define i16 @extractelement_vgpr_v4i16_idx0(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i16_idx0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -381,12 +381,12 @@ define i16 @extractelement_vgpr_v4i16_idx0(<4 x i16> addrspace(1)* %ptr) { ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr + %vector = load <4 x i16>, ptr addrspace(1) %ptr %element = extractelement <4 x i16> %vector, i32 0 ret i16 %element } -define i16 @extractelement_vgpr_v4i16_idx1(<4 x i16> addrspace(1)* %ptr) { +define i16 @extractelement_vgpr_v4i16_idx1(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i16_idx1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -428,12 +428,12 @@ define i16 @extractelement_vgpr_v4i16_idx1(<4 x i16> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr + %vector = load <4 x i16>, ptr addrspace(1) %ptr %element = extractelement <4 x i16> %vector, i32 1 ret i16 %element } -define i16 @extractelement_vgpr_v4i16_idx2(<4 x i16> addrspace(1)* %ptr) { +define i16 @extractelement_vgpr_v4i16_idx2(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i16_idx2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -475,12 +475,12 @@ define i16 @extractelement_vgpr_v4i16_idx2(<4 x i16> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr + %vector = load <4 x i16>, ptr addrspace(1) %ptr %element = extractelement <4 x i16> %vector, i32 2 ret i16 %element } -define i16 @extractelement_vgpr_v4i16_idx3(<4 x i16> addrspace(1)* %ptr) { +define i16 @extractelement_vgpr_v4i16_idx3(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i16_idx3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -522,12 +522,12 @@ define i16 @extractelement_vgpr_v4i16_idx3(<4 x i16> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr + %vector = load <4 x i16>, ptr addrspace(1) %ptr %element = extractelement <4 x i16> %vector, i32 3 ret i16 %element } -define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(<8 x i16> addrspace(4)* inreg %ptr, i32 inreg %idx) { +define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) { ; GCN-LABEL: extractelement_sgpr_v8i16_sgpr_idx: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -577,12 +577,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(<8 x i16> addrspace(4)* ; GFX11-NEXT: s_lshl_b32 s1, s1, 4 ; GFX11-NEXT: s_lshr_b32 s0, s0, s1 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %vector = load <8 x i16>, ptr addrspace(4) %ptr %element = extractelement <8 x i16> %vector, i32 %idx ret i16 %element } -define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(<8 x i16> addrspace(1)* %ptr, i32 inreg %idx) { +define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) { ; GFX9-LABEL: extractelement_vgpr_v8i16_sgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off @@ -673,12 +673,12 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(<8 x i16> addrspace(1)* ; GFX11-NEXT: v_lshrrev_b32_e32 v0, s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %vector = load <8 x i16>, ptr addrspace(1) %ptr %element = extractelement <8 x i16> %vector, i32 %idx ret i16 %element } -define i16 @extractelement_vgpr_v8i16_vgpr_idx(<8 x i16> addrspace(1)* %ptr, i32 %idx) { +define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) { ; GFX9-LABEL: extractelement_vgpr_v8i16_vgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -770,12 +770,12 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(<8 x i16> addrspace(1)* %ptr, i32 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 4, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %vector = load <8 x i16>, ptr addrspace(1) %ptr %element = extractelement <8 x i16> %vector, i32 %idx ret i16 %element } -define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(<8 x i16> addrspace(4)* inreg %ptr, i32 %idx) { +define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) { ; GCN-LABEL: extractelement_sgpr_v8i16_vgpr_idx: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -836,12 +836,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(<8 x i16> addrspace(4)* ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %vector = load <8 x i16>, ptr addrspace(4) %ptr %element = extractelement <8 x i16> %vector, i32 %idx ret i16 %element } -define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx0(<8 x i16> addrspace(4)* inreg %ptr) { +define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx0(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v8i16_idx0: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -859,12 +859,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx0(<8 x i16> addrspace(4)* inr ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %vector = load <8 x i16>, ptr addrspace(4) %ptr %element = extractelement <8 x i16> %vector, i32 0 ret i16 %element } -define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx1(<8 x i16> addrspace(4)* inreg %ptr) { +define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx1(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v8i16_idx1: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -885,12 +885,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx1(<8 x i16> addrspace(4)* inr ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshr_b32 s0, s0, 16 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %vector = load <8 x i16>, ptr addrspace(4) %ptr %element = extractelement <8 x i16> %vector, i32 1 ret i16 %element } -define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx2(<8 x i16> addrspace(4)* inreg %ptr) { +define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx2(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v8i16_idx2: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -911,12 +911,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx2(<8 x i16> addrspace(4)* inr ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, s1 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %vector = load <8 x i16>, ptr addrspace(4) %ptr %element = extractelement <8 x i16> %vector, i32 2 ret i16 %element } -define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx3(<8 x i16> addrspace(4)* inreg %ptr) { +define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx3(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v8i16_idx3: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -937,12 +937,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx3(<8 x i16> addrspace(4)* inr ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshr_b32 s0, s1, 16 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %vector = load <8 x i16>, ptr addrspace(4) %ptr %element = extractelement <8 x i16> %vector, i32 3 ret i16 %element } -define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx4(<8 x i16> addrspace(4)* inreg %ptr) { +define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx4(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v8i16_idx4: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -963,12 +963,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx4(<8 x i16> addrspace(4)* inr ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, s2 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %vector = load <8 x i16>, ptr addrspace(4) %ptr %element = extractelement <8 x i16> %vector, i32 4 ret i16 %element } -define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx5(<8 x i16> addrspace(4)* inreg %ptr) { +define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx5(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v8i16_idx5: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -989,12 +989,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx5(<8 x i16> addrspace(4)* inr ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshr_b32 s0, s2, 16 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %vector = load <8 x i16>, ptr addrspace(4) %ptr %element = extractelement <8 x i16> %vector, i32 5 ret i16 %element } -define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx6(<8 x i16> addrspace(4)* inreg %ptr) { +define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx6(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v8i16_idx6: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -1015,12 +1015,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx6(<8 x i16> addrspace(4)* inr ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, s3 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %vector = load <8 x i16>, ptr addrspace(4) %ptr %element = extractelement <8 x i16> %vector, i32 6 ret i16 %element } -define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx7(<8 x i16> addrspace(4)* inreg %ptr) { +define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx7(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v8i16_idx7: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -1041,12 +1041,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx7(<8 x i16> addrspace(4)* inr ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshr_b32 s0, s3, 16 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %vector = load <8 x i16>, ptr addrspace(4) %ptr %element = extractelement <8 x i16> %vector, i32 7 ret i16 %element } -define i16 @extractelement_vgpr_v8i16_idx0(<8 x i16> addrspace(1)* %ptr) { +define i16 @extractelement_vgpr_v8i16_idx0(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v8i16_idx0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1086,12 +1086,12 @@ define i16 @extractelement_vgpr_v8i16_idx0(<8 x i16> addrspace(1)* %ptr) { ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %vector = load <8 x i16>, ptr addrspace(1) %ptr %element = extractelement <8 x i16> %vector, i32 0 ret i16 %element } -define i16 @extractelement_vgpr_v8i16_idx1(<8 x i16> addrspace(1)* %ptr) { +define i16 @extractelement_vgpr_v8i16_idx1(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v8i16_idx1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1136,12 +1136,12 @@ define i16 @extractelement_vgpr_v8i16_idx1(<8 x i16> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %vector = load <8 x i16>, ptr addrspace(1) %ptr %element = extractelement <8 x i16> %vector, i32 1 ret i16 %element } -define i16 @extractelement_vgpr_v8i16_idx2(<8 x i16> addrspace(1)* %ptr) { +define i16 @extractelement_vgpr_v8i16_idx2(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v8i16_idx2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1186,12 +1186,12 @@ define i16 @extractelement_vgpr_v8i16_idx2(<8 x i16> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %vector = load <8 x i16>, ptr addrspace(1) %ptr %element = extractelement <8 x i16> %vector, i32 2 ret i16 %element } -define i16 @extractelement_vgpr_v8i16_idx3(<8 x i16> addrspace(1)* %ptr) { +define i16 @extractelement_vgpr_v8i16_idx3(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v8i16_idx3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1236,12 +1236,12 @@ define i16 @extractelement_vgpr_v8i16_idx3(<8 x i16> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %vector = load <8 x i16>, ptr addrspace(1) %ptr %element = extractelement <8 x i16> %vector, i32 3 ret i16 %element } -define i16 @extractelement_vgpr_v8i16_idx4(<8 x i16> addrspace(1)* %ptr) { +define i16 @extractelement_vgpr_v8i16_idx4(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v8i16_idx4: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1286,12 +1286,12 @@ define i16 @extractelement_vgpr_v8i16_idx4(<8 x i16> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %vector = load <8 x i16>, ptr addrspace(1) %ptr %element = extractelement <8 x i16> %vector, i32 4 ret i16 %element } -define i16 @extractelement_vgpr_v8i16_idx5(<8 x i16> addrspace(1)* %ptr) { +define i16 @extractelement_vgpr_v8i16_idx5(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v8i16_idx5: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1336,12 +1336,12 @@ define i16 @extractelement_vgpr_v8i16_idx5(<8 x i16> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %vector = load <8 x i16>, ptr addrspace(1) %ptr %element = extractelement <8 x i16> %vector, i32 5 ret i16 %element } -define i16 @extractelement_vgpr_v8i16_idx6(<8 x i16> addrspace(1)* %ptr) { +define i16 @extractelement_vgpr_v8i16_idx6(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v8i16_idx6: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1386,12 +1386,12 @@ define i16 @extractelement_vgpr_v8i16_idx6(<8 x i16> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %vector = load <8 x i16>, ptr addrspace(1) %ptr %element = extractelement <8 x i16> %vector, i32 6 ret i16 %element } -define i16 @extractelement_vgpr_v8i16_idx7(<8 x i16> addrspace(1)* %ptr) { +define i16 @extractelement_vgpr_v8i16_idx7(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v8i16_idx7: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1436,7 +1436,7 @@ define i16 @extractelement_vgpr_v8i16_idx7(<8 x i16> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %vector = load <8 x i16>, ptr addrspace(1) %ptr %element = extractelement <8 x i16> %vector, i32 7 ret i16 %element } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll index 60be5f2e5e30c..e93ff0dc92ceb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll @@ -5,7 +5,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s -define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) { +define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) { ; GCN-LABEL: extractelement_sgpr_v4i8_sgpr_idx: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 @@ -33,12 +33,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(<4 x i8> addrspace(4)* in ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshr_b32 s0, s0, s1 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr + %vector = load <4 x i8>, ptr addrspace(4) %ptr %element = extractelement <4 x i8> %vector, i32 %idx ret i8 %element } -define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 inreg %idx) { +define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) { ; GFX9-LABEL: extractelement_vgpr_v4i8_sgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dword v0, v[0:1], off @@ -92,12 +92,12 @@ define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(<4 x i8> addrspace(1)* %p ; GFX11-NEXT: v_lshrrev_b32_e32 v0, s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %vector = load <4 x i8>, ptr addrspace(1) %ptr %element = extractelement <4 x i8> %vector, i32 %idx ret i8 %element } -define i8 @extractelement_vgpr_v4i8_vgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 %idx) { +define i8 @extractelement_vgpr_v4i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) { ; GFX9-LABEL: extractelement_vgpr_v4i8_vgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -153,12 +153,12 @@ define i8 @extractelement_vgpr_v4i8_vgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 %i ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %vector = load <4 x i8>, ptr addrspace(1) %ptr %element = extractelement <4 x i8> %vector, i32 %idx ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 %idx) { +define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) { ; GFX9-LABEL: extractelement_sgpr_v4i8_vgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 @@ -210,12 +210,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(<4 x i8> addrspace(4)* in ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr + %vector = load <4 x i8>, ptr addrspace(4) %ptr %element = extractelement <4 x i8> %vector, i32 %idx ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(<4 x i8> addrspace(4)* inreg %ptr) { +define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v4i8_idx0: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 @@ -233,12 +233,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(<4 x i8> addrspace(4)* inreg ; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr + %vector = load <4 x i8>, ptr addrspace(4) %ptr %element = extractelement <4 x i8> %vector, i32 0 ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(<4 x i8> addrspace(4)* inreg %ptr) { +define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v4i8_idx1: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 @@ -259,12 +259,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(<4 x i8> addrspace(4)* inreg ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshr_b32 s0, s0, 8 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr + %vector = load <4 x i8>, ptr addrspace(4) %ptr %element = extractelement <4 x i8> %vector, i32 1 ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(<4 x i8> addrspace(4)* inreg %ptr) { +define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v4i8_idx2: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 @@ -285,12 +285,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(<4 x i8> addrspace(4)* inreg ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshr_b32 s0, s0, 16 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr + %vector = load <4 x i8>, ptr addrspace(4) %ptr %element = extractelement <4 x i8> %vector, i32 2 ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(<4 x i8> addrspace(4)* inreg %ptr) { +define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v4i8_idx3: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 @@ -311,12 +311,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(<4 x i8> addrspace(4)* inreg ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshr_b32 s0, s0, 24 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr + %vector = load <4 x i8>, ptr addrspace(4) %ptr %element = extractelement <4 x i8> %vector, i32 3 ret i8 %element } -define i8 @extractelement_vgpr_v4i8_idx0(<4 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v4i8_idx0(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i8_idx0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -356,12 +356,12 @@ define i8 @extractelement_vgpr_v4i8_idx0(<4 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %vector = load <4 x i8>, ptr addrspace(1) %ptr %element = extractelement <4 x i8> %vector, i32 0 ret i8 %element } -define i8 @extractelement_vgpr_v4i8_idx1(<4 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v4i8_idx1(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i8_idx1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -406,12 +406,12 @@ define i8 @extractelement_vgpr_v4i8_idx1(<4 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %vector = load <4 x i8>, ptr addrspace(1) %ptr %element = extractelement <4 x i8> %vector, i32 1 ret i8 %element } -define i8 @extractelement_vgpr_v4i8_idx2(<4 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v4i8_idx2(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i8_idx2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -456,12 +456,12 @@ define i8 @extractelement_vgpr_v4i8_idx2(<4 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %vector = load <4 x i8>, ptr addrspace(1) %ptr %element = extractelement <4 x i8> %vector, i32 2 ret i8 %element } -define i8 @extractelement_vgpr_v4i8_idx3(<4 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v4i8_idx3(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i8_idx3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -506,12 +506,12 @@ define i8 @extractelement_vgpr_v4i8_idx3(<4 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %vector = load <4 x i8>, ptr addrspace(1) %ptr %element = extractelement <4 x i8> %vector, i32 3 ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) { +define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) { ; GCN-LABEL: extractelement_sgpr_v8i8_sgpr_idx: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -549,12 +549,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(<8 x i8> addrspace(4)* in ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_lshr_b32 s0, s0, s1 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %vector = load <8 x i8>, ptr addrspace(4) %ptr %element = extractelement <8 x i8> %vector, i32 %idx ret i8 %element } -define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 inreg %idx) { +define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) { ; GFX9-LABEL: extractelement_vgpr_v8i8_sgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off @@ -624,12 +624,12 @@ define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(<8 x i8> addrspace(1)* %p ; GFX11-NEXT: v_lshrrev_b32_e32 v0, s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %vector = load <8 x i8>, ptr addrspace(1) %ptr %element = extractelement <8 x i8> %vector, i32 %idx ret i8 %element } -define i8 @extractelement_vgpr_v8i8_vgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 %idx) { +define i8 @extractelement_vgpr_v8i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) { ; GFX9-LABEL: extractelement_vgpr_v8i8_vgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -700,12 +700,12 @@ define i8 @extractelement_vgpr_v8i8_vgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %vector = load <8 x i8>, ptr addrspace(1) %ptr %element = extractelement <8 x i8> %vector, i32 %idx ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 %idx) { +define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) { ; GCN-LABEL: extractelement_sgpr_v8i8_vgpr_idx: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -750,12 +750,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(<8 x i8> addrspace(4)* in ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %vector = load <8 x i8>, ptr addrspace(4) %ptr %element = extractelement <8 x i8> %vector, i32 %idx ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(<8 x i8> addrspace(4)* inreg %ptr) { +define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v8i8_idx0: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -773,12 +773,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(<8 x i8> addrspace(4)* inreg ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %vector = load <8 x i8>, ptr addrspace(4) %ptr %element = extractelement <8 x i8> %vector, i32 0 ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(<8 x i8> addrspace(4)* inreg %ptr) { +define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v8i8_idx1: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -799,12 +799,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(<8 x i8> addrspace(4)* inreg ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshr_b32 s0, s0, 8 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %vector = load <8 x i8>, ptr addrspace(4) %ptr %element = extractelement <8 x i8> %vector, i32 1 ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(<8 x i8> addrspace(4)* inreg %ptr) { +define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v8i8_idx2: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -825,12 +825,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(<8 x i8> addrspace(4)* inreg ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshr_b32 s0, s0, 16 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %vector = load <8 x i8>, ptr addrspace(4) %ptr %element = extractelement <8 x i8> %vector, i32 2 ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(<8 x i8> addrspace(4)* inreg %ptr) { +define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v8i8_idx3: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -851,12 +851,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(<8 x i8> addrspace(4)* inreg ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshr_b32 s0, s0, 24 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %vector = load <8 x i8>, ptr addrspace(4) %ptr %element = extractelement <8 x i8> %vector, i32 3 ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(<8 x i8> addrspace(4)* inreg %ptr) { +define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v8i8_idx4: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -877,12 +877,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(<8 x i8> addrspace(4)* inreg ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, s1 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %vector = load <8 x i8>, ptr addrspace(4) %ptr %element = extractelement <8 x i8> %vector, i32 4 ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(<8 x i8> addrspace(4)* inreg %ptr) { +define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v8i8_idx5: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -903,12 +903,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(<8 x i8> addrspace(4)* inreg ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshr_b32 s0, s1, 8 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %vector = load <8 x i8>, ptr addrspace(4) %ptr %element = extractelement <8 x i8> %vector, i32 5 ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(<8 x i8> addrspace(4)* inreg %ptr) { +define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v8i8_idx6: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -929,12 +929,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(<8 x i8> addrspace(4)* inreg ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshr_b32 s0, s1, 16 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %vector = load <8 x i8>, ptr addrspace(4) %ptr %element = extractelement <8 x i8> %vector, i32 6 ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(<8 x i8> addrspace(4)* inreg %ptr) { +define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v8i8_idx7: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -955,12 +955,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(<8 x i8> addrspace(4)* inreg ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshr_b32 s0, s1, 24 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %vector = load <8 x i8>, ptr addrspace(4) %ptr %element = extractelement <8 x i8> %vector, i32 7 ret i8 %element } -define i8 @extractelement_vgpr_v8i8_idx0(<8 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v8i8_idx0(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v8i8_idx0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1000,12 +1000,12 @@ define i8 @extractelement_vgpr_v8i8_idx0(<8 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %vector = load <8 x i8>, ptr addrspace(1) %ptr %element = extractelement <8 x i8> %vector, i32 0 ret i8 %element } -define i8 @extractelement_vgpr_v8i8_idx1(<8 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v8i8_idx1(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v8i8_idx1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1050,12 +1050,12 @@ define i8 @extractelement_vgpr_v8i8_idx1(<8 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %vector = load <8 x i8>, ptr addrspace(1) %ptr %element = extractelement <8 x i8> %vector, i32 1 ret i8 %element } -define i8 @extractelement_vgpr_v8i8_idx2(<8 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v8i8_idx2(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v8i8_idx2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1100,12 +1100,12 @@ define i8 @extractelement_vgpr_v8i8_idx2(<8 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %vector = load <8 x i8>, ptr addrspace(1) %ptr %element = extractelement <8 x i8> %vector, i32 2 ret i8 %element } -define i8 @extractelement_vgpr_v8i8_idx3(<8 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v8i8_idx3(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v8i8_idx3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1150,12 +1150,12 @@ define i8 @extractelement_vgpr_v8i8_idx3(<8 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %vector = load <8 x i8>, ptr addrspace(1) %ptr %element = extractelement <8 x i8> %vector, i32 3 ret i8 %element } -define i8 @extractelement_vgpr_v8i8_idx4(<8 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v8i8_idx4(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v8i8_idx4: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1200,12 +1200,12 @@ define i8 @extractelement_vgpr_v8i8_idx4(<8 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %vector = load <8 x i8>, ptr addrspace(1) %ptr %element = extractelement <8 x i8> %vector, i32 4 ret i8 %element } -define i8 @extractelement_vgpr_v8i8_idx5(<8 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v8i8_idx5(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v8i8_idx5: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1250,12 +1250,12 @@ define i8 @extractelement_vgpr_v8i8_idx5(<8 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %vector = load <8 x i8>, ptr addrspace(1) %ptr %element = extractelement <8 x i8> %vector, i32 5 ret i8 %element } -define i8 @extractelement_vgpr_v8i8_idx6(<8 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v8i8_idx6(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v8i8_idx6: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1300,12 +1300,12 @@ define i8 @extractelement_vgpr_v8i8_idx6(<8 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %vector = load <8 x i8>, ptr addrspace(1) %ptr %element = extractelement <8 x i8> %vector, i32 6 ret i8 %element } -define i8 @extractelement_vgpr_v8i8_idx7(<8 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v8i8_idx7(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v8i8_idx7: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1350,12 +1350,12 @@ define i8 @extractelement_vgpr_v8i8_idx7(<8 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %vector = load <8 x i8>, ptr addrspace(1) %ptr %element = extractelement <8 x i8> %vector, i32 7 ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) { +define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) { ; GCN-LABEL: extractelement_sgpr_v16i8_sgpr_idx: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -1405,12 +1405,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(<16 x i8> addrspace(4)* ; GFX11-NEXT: s_lshl_b32 s1, s1, 3 ; GFX11-NEXT: s_lshr_b32 s0, s0, s1 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr + %vector = load <16 x i8>, ptr addrspace(4) %ptr %element = extractelement <16 x i8> %vector, i32 %idx ret i8 %element } -define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 inreg %idx) { +define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) { ; GFX9-LABEL: extractelement_vgpr_v16i8_sgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off @@ -1501,12 +1501,12 @@ define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(<16 x i8> addrspace(1)* ; GFX11-NEXT: v_lshrrev_b32_e32 v0, s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 %idx ret i8 %element } -define i8 @extractelement_vgpr_v16i8_vgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 %idx) { +define i8 @extractelement_vgpr_v16i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) { ; GFX9-LABEL: extractelement_vgpr_v16i8_vgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1598,12 +1598,12 @@ define i8 @extractelement_vgpr_v16i8_vgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 3, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 %idx ret i8 %element } -define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 %idx) { +define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) { ; GCN-LABEL: extractelement_sgpr_v16i8_vgpr_idx: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -1664,12 +1664,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(<16 x i8> addrspace(4)* ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: ; return to shader part epilog - %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr + %vector = load <16 x i8>, ptr addrspace(4) %ptr %element = extractelement <16 x i8> %vector, i32 %idx ret i8 %element } -define i8 @extractelement_vgpr_v16i8_idx0(<16 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v16i8_idx0(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v16i8_idx0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1709,12 +1709,12 @@ define i8 @extractelement_vgpr_v16i8_idx0(<16 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 0 ret i8 %element } -define i8 @extractelement_vgpr_v16i8_idx1(<16 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v16i8_idx1(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v16i8_idx1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1759,12 +1759,12 @@ define i8 @extractelement_vgpr_v16i8_idx1(<16 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 1 ret i8 %element } -define i8 @extractelement_vgpr_v16i8_idx2(<16 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v16i8_idx2(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v16i8_idx2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1809,12 +1809,12 @@ define i8 @extractelement_vgpr_v16i8_idx2(<16 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 2 ret i8 %element } -define i8 @extractelement_vgpr_v16i8_idx3(<16 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v16i8_idx3(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v16i8_idx3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1859,12 +1859,12 @@ define i8 @extractelement_vgpr_v16i8_idx3(<16 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 3 ret i8 %element } -define i8 @extractelement_vgpr_v16i8_idx4(<16 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v16i8_idx4(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v16i8_idx4: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1909,12 +1909,12 @@ define i8 @extractelement_vgpr_v16i8_idx4(<16 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 4 ret i8 %element } -define i8 @extractelement_vgpr_v16i8_idx5(<16 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v16i8_idx5(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v16i8_idx5: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1959,12 +1959,12 @@ define i8 @extractelement_vgpr_v16i8_idx5(<16 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 5 ret i8 %element } -define i8 @extractelement_vgpr_v16i8_idx6(<16 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v16i8_idx6(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v16i8_idx6: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2009,12 +2009,12 @@ define i8 @extractelement_vgpr_v16i8_idx6(<16 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 6 ret i8 %element } -define i8 @extractelement_vgpr_v16i8_idx7(<16 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v16i8_idx7(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v16i8_idx7: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2059,12 +2059,12 @@ define i8 @extractelement_vgpr_v16i8_idx7(<16 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 7 ret i8 %element } -define i8 @extractelement_vgpr_v16i8_idx8(<16 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v16i8_idx8(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v16i8_idx8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2109,12 +2109,12 @@ define i8 @extractelement_vgpr_v16i8_idx8(<16 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 8 ret i8 %element } -define i8 @extractelement_vgpr_v16i8_idx9(<16 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v16i8_idx9(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v16i8_idx9: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2159,12 +2159,12 @@ define i8 @extractelement_vgpr_v16i8_idx9(<16 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 9 ret i8 %element } -define i8 @extractelement_vgpr_v16i8_idx10(<16 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v16i8_idx10(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v16i8_idx10: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2209,12 +2209,12 @@ define i8 @extractelement_vgpr_v16i8_idx10(<16 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 10 ret i8 %element } -define i8 @extractelement_vgpr_v16i8_idx11(<16 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v16i8_idx11(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v16i8_idx11: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2259,12 +2259,12 @@ define i8 @extractelement_vgpr_v16i8_idx11(<16 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 11 ret i8 %element } -define i8 @extractelement_vgpr_v16i8_idx12(<16 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v16i8_idx12(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v16i8_idx12: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2309,12 +2309,12 @@ define i8 @extractelement_vgpr_v16i8_idx12(<16 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 12 ret i8 %element } -define i8 @extractelement_vgpr_v16i8_idx13(<16 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v16i8_idx13(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v16i8_idx13: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2359,12 +2359,12 @@ define i8 @extractelement_vgpr_v16i8_idx13(<16 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 13 ret i8 %element } -define i8 @extractelement_vgpr_v16i8_idx14(<16 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v16i8_idx14(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v16i8_idx14: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2409,12 +2409,12 @@ define i8 @extractelement_vgpr_v16i8_idx14(<16 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 14 ret i8 %element } -define i8 @extractelement_vgpr_v16i8_idx15(<16 x i8> addrspace(1)* %ptr) { +define i8 @extractelement_vgpr_v16i8_idx15(ptr addrspace(1) %ptr) { ; GFX9-LABEL: extractelement_vgpr_v16i8_idx15: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2459,7 +2459,7 @@ define i8 @extractelement_vgpr_v16i8_idx15(<16 x i8> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vector = load <16 x i8>, ptr addrspace(1) %ptr %element = extractelement <16 x i8> %vector, i32 15 ret i8 %element } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll index 79ca174983420..e5c1d645d2cb9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -500,7 +500,7 @@ define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) { ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <8 x i64> , i32 %sel - store i64 %ext, i64 addrspace(1)* undef + store i64 %ext, ptr addrspace(1) undef ret void } @@ -702,7 +702,7 @@ define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) { ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <8 x i64> %vec, i32 %sel - store i64 %ext, i64 addrspace(1)* undef + store i64 %ext, ptr addrspace(1) undef ret void } @@ -821,7 +821,7 @@ define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) { ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <8 x i64> %vec, i32 %sel - store i64 %ext, i64 addrspace(1)* undef + store i64 %ext, ptr addrspace(1) undef ret void } @@ -927,7 +927,7 @@ define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %se ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <8 x i64> %vec, i32 %sel - store i64 %ext, i64 addrspace(1)* undef + store i64 %ext, ptr addrspace(1) undef ret void } @@ -1586,7 +1586,7 @@ entry: ret double %ext } -define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) { +define ptr addrspace(3) @dyn_extract_v8p3_v_v(<8 x ptr addrspace(3)> %vec, i32 %idx) { ; GCN-LABEL: dyn_extract_v8p3_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1626,11 +1626,11 @@ define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 % ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] entry: - %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx - ret i8 addrspace(3)* %ext + %ext = extractelement <8 x ptr addrspace(3)> %vec, i32 %idx + ret ptr addrspace(3) %ext } -define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) { +define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x ptr addrspace(3)> inreg %vec, i32 inreg %idx) { ; GPRIDX-LABEL: dyn_extract_v8p3_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 @@ -1700,12 +1700,12 @@ define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i ; GFX11-NEXT: ds_store_b32 v0, v0 ; GFX11-NEXT: s_endpgm entry: - %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx - store i8 addrspace(3)* %ext, i8 addrspace(3)* addrspace(3)* undef + %ext = extractelement <8 x ptr addrspace(3)> %vec, i32 %idx + store ptr addrspace(3) %ext, ptr addrspace(3) undef ret void } -define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) { +define ptr addrspace(1) @dyn_extract_v8p1_v_v(<8 x ptr addrspace(1)> %vec, i32 %idx) { ; GCN-LABEL: dyn_extract_v8p1_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1779,11 +1779,11 @@ define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 % ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: - %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx - ret i8 addrspace(1)* %ext + %ext = extractelement <8 x ptr addrspace(1)> %vec, i32 %idx + ret ptr addrspace(1) %ext } -define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) { +define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x ptr addrspace(1)> inreg %vec, i32 inreg %idx) { ; GPRIDX-LABEL: dyn_extract_v8p1_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -1884,8 +1884,8 @@ define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm entry: - %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx - store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef + %ext = extractelement <8 x ptr addrspace(1)> %vec, i32 %idx + store ptr addrspace(1) %ext, ptr addrspace(1) undef ret void } @@ -3036,7 +3036,7 @@ entry: ret double %ext } -define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32 %sel) { +define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v5f64_s_s: ; GPRIDX: .amd_kernel_code_t ; GPRIDX-NEXT: amd_code_version_major = 1 @@ -3405,7 +3405,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32 ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <5 x double> , i32 %sel - store double %ext, double addrspace(1)* %out + store double %ext, ptr addrspace(1) %out ret void } @@ -3957,7 +3957,7 @@ entry: ret float %ext } -define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(float addrspace(1)* %out, i32 %sel) { +define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v4f32_s_s_s: ; GPRIDX: .amd_kernel_code_t ; GPRIDX-NEXT: amd_code_version_major = 1 @@ -4299,11 +4299,11 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(float addrspace(1)* %out, i32 ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <4 x float> , i32 %sel - store float %ext, float addrspace(1)* %out + store float %ext, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(double addrspace(1)* %out, i32 %sel) { +define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v4f64_s_s_s: ; GPRIDX: .amd_kernel_code_t ; GPRIDX-NEXT: amd_code_version_major = 1 @@ -4656,11 +4656,11 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(double addrspace(1)* %out, i3 ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <4 x double> , i32 %sel - store double %ext, double addrspace(1)* %out + store double %ext, ptr addrspace(1) %out ret void } -define i32 @v_extract_v64i32_7(<64 x i32> addrspace(1)* %ptr) { +define i32 @v_extract_v64i32_7(ptr addrspace(1) %ptr) { ; GPRIDX-LABEL: v_extract_v64i32_7: ; GPRIDX: ; %bb.0: ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4696,12 +4696,12 @@ define i32 @v_extract_v64i32_7(<64 x i32> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v7 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr + %vec = load <64 x i32>, ptr addrspace(1) %ptr %elt = extractelement <64 x i32> %vec, i32 7 ret i32 %elt } -define i32 @v_extract_v64i32_32(<64 x i32> addrspace(1)* %ptr) { +define i32 @v_extract_v64i32_32(ptr addrspace(1) %ptr) { ; GPRIDX-LABEL: v_extract_v64i32_32: ; GPRIDX: ; %bb.0: ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4736,12 +4736,12 @@ define i32 @v_extract_v64i32_32(<64 x i32> addrspace(1)* %ptr) { ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr + %vec = load <64 x i32>, ptr addrspace(1) %ptr %elt = extractelement <64 x i32> %vec, i32 32 ret i32 %elt } -define i32 @v_extract_v64i32_33(<64 x i32> addrspace(1)* %ptr) { +define i32 @v_extract_v64i32_33(ptr addrspace(1) %ptr) { ; GPRIDX-LABEL: v_extract_v64i32_33: ; GPRIDX: ; %bb.0: ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4780,12 +4780,12 @@ define i32 @v_extract_v64i32_33(<64 x i32> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr + %vec = load <64 x i32>, ptr addrspace(1) %ptr %elt = extractelement <64 x i32> %vec, i32 33 ret i32 %elt } -define i32 @v_extract_v64i32_37(<64 x i32> addrspace(1)* %ptr) { +define i32 @v_extract_v64i32_37(ptr addrspace(1) %ptr) { ; GPRIDX-LABEL: v_extract_v64i32_37: ; GPRIDX: ; %bb.0: ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4821,7 +4821,7 @@ define i32 @v_extract_v64i32_37(<64 x i32> addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr + %vec = load <64 x i32>, ptr addrspace(1) %ptr %elt = extractelement <64 x i32> %vec, i32 37 ret i32 %elt } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll index d71b58f484a42..3f93141252730 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX940 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s -define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(float* %ptr, float %data) { +define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %data) { ; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -23,11 +23,11 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(float* %ptr, float ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) ; GFX11-NEXT: S_ENDPGM 0 - %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float* %ptr, float %data) + %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data) ret void } -define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(float* %ptr, float %data) { +define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data) { ; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -50,11 +50,11 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(float* %ptr, float %d ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float* %ptr, float %data) + %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data) ret float %ret } -define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(float* %ptr, float %data) #0 { +define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %data) #0 { ; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -75,11 +75,11 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(float* %ptr, float ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) ; GFX11-NEXT: S_ENDPGM 0 - %ret = atomicrmw fadd float* %ptr, float %data syncscope("wavefront") monotonic + %ret = atomicrmw fadd ptr %ptr, float %data syncscope("wavefront") monotonic ret void } -define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(float* %ptr, float %data) #0 { +define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data) #0 { ; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -102,10 +102,10 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(float* %ptr, float %d ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %ret = atomicrmw fadd float* %ptr, float %data syncscope("wavefront") monotonic + %ret = atomicrmw fadd ptr %ptr, float %data syncscope("wavefront") monotonic ret float %ret } -declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float*, float) +declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr, float) attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll index 4ae1c5647e690..620b1da346762 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_intrinsic(double* %ptr, double %data) { +define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_intrinsic(ptr %ptr, double %data) { ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -15,11 +15,11 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_intrinsic(double* %ptr, doubl ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s64) on %ir.ptr) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr %ptr, double %data) ret void } -define amdgpu_ps double @flat_atomic_fadd_f64_rtn_intrinsic(double* %ptr, double %data) { +define amdgpu_ps double @flat_atomic_fadd_f64_rtn_intrinsic(ptr %ptr, double %data) { ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -38,11 +38,11 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_intrinsic(double* %ptr, double ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 - %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr %ptr, double %data) ret double %ret } -define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(double* %ptr, double %data) #0 { +define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %data) #0 { ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -55,11 +55,11 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(double* %ptr, doubl ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = atomicrmw fadd double* %ptr, double %data syncscope("wavefront") monotonic + %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic ret void } -define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(double* %ptr, double %data) #0 { +define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %data) #0 { ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -78,10 +78,10 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(double* %ptr, double ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 - %ret = atomicrmw fadd double* %ptr, double %data syncscope("wavefront") monotonic + %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic ret double %ret } -declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double*, double) +declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr, double) attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll index 94c8fd34926e5..d50ce5e7ecda3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX940 %s -define amdgpu_ps void @flat_atomic_fadd_v2f16_no_rtn_intrinsic(<2 x half>* %ptr, <2 x half> %data) { +define amdgpu_ps void @flat_atomic_fadd_v2f16_no_rtn_intrinsic(ptr %ptr, <2 x half> %data) { ; GFX940-LABEL: name: flat_atomic_fadd_v2f16_no_rtn_intrinsic ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -12,11 +12,11 @@ define amdgpu_ps void @flat_atomic_fadd_v2f16_no_rtn_intrinsic(<2 x half>* %ptr, ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX940-NEXT: FLAT_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr) ; GFX940-NEXT: S_ENDPGM 0 - %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr %ptr, <2 x half> %data) ret void } -define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn_intrinsic(<2 x half>* %ptr, <2 x half> %data) { +define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn_intrinsic(ptr %ptr, <2 x half> %data) { ; GFX940-LABEL: name: flat_atomic_fadd_v2f16_rtn_intrinsic ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -28,8 +28,8 @@ define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn_intrinsic(<2 x half>* %p ; GFX940-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr) ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]] ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr %ptr, <2 x half> %data) ret <2 x half> %ret } -declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half>*, <2 x half>) +declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr, <2 x half>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll index 8bc4111abe1a5..0edc177a8be5d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll @@ -38,8 +38,8 @@ define amdgpu_ps void @amdgpu_ps() { ; PAL-NEXT: s_waitcnt vmcnt(0) ; PAL-NEXT: s_endpgm %alloca = alloca i32, addrspace(5) - %cast = addrspacecast i32 addrspace(5)* %alloca to i32* - store volatile i32 0, i32* %cast + %cast = addrspacecast ptr addrspace(5) %alloca to ptr + store volatile i32 0, ptr %cast ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll index 5914521cf4189..a8aa6c780b86a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll @@ -22,8 +22,8 @@ ; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0 define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() { %alloca = alloca i32, addrspace(5) - %cast = addrspacecast i32 addrspace(5)* %alloca to i32* - store volatile i32 0, i32* %cast + %cast = addrspacecast ptr addrspace(5) %alloca to ptr + store volatile i32 0, ptr %cast ret void } @@ -47,7 +47,7 @@ define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() { ; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0 define amdgpu_kernel void @stack_object_in_kernel_no_calls() { %alloca = alloca i32, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %alloca + store volatile i32 0, ptr addrspace(5) %alloca ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll index 7d0029e9efa56..1369b02d87fe1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -77,14 +77,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX11-NEXT: s_endpgm bb: %i = alloca [32 x float], align 4, addrspace(5) - %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* - %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx - %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* - store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx + store volatile i32 15, ptr addrspace(5) %i7, align 4 %i9 = and i32 %idx, 15 - %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 - %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* - %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 + %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 ret void } @@ -148,16 +145,13 @@ define amdgpu_kernel void @store_load_vindex_kernel() { ; GFX11-NEXT: s_endpgm bb: %i = alloca [32 x float], align 4, addrspace(5) - %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* %i2 = tail call i32 @llvm.amdgcn.workitem.id.x() %i3 = zext i32 %i2 to i64 - %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i2 - %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* - store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i2 + store volatile i32 15, ptr addrspace(5) %i7, align 4 %i9 = sub nsw i32 31, %i2 - %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 - %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* - %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 + %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 ret void } @@ -221,18 +215,15 @@ define void @store_load_vindex_foo(i32 %idx) { ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: %i = alloca [32 x float], align 4, addrspace(5) - %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* - %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx - %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* - store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx + store volatile i32 15, ptr addrspace(5) %i7, align 4 %i9 = and i32 %idx, 15 - %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 - %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* - %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 + %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 ret void } -define void @private_ptr_foo(float addrspace(5)* nocapture %arg) { +define void @private_ptr_foo(ptr addrspace(5) nocapture %arg) { ; GFX9-LABEL: private_ptr_foo: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -266,8 +257,8 @@ define void @private_ptr_foo(float addrspace(5)* nocapture %arg) { ; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:4 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr inbounds float, float addrspace(5)* %arg, i32 1 - store float 1.000000e+01, float addrspace(5)* %gep, align 4 + %gep = getelementptr inbounds float, ptr addrspace(5) %arg, i32 1 + store float 1.000000e+01, ptr addrspace(5) %gep, align 4 ret void } @@ -353,16 +344,13 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) { bb: %padding = alloca [64 x i32], align 4, addrspace(5) %i = alloca [32 x float], align 4, addrspace(5) - %pad_gep = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %padding, i32 0, i32 undef - %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4 - %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* - %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx - %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* - store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef + %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4 + %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx + store volatile i32 15, ptr addrspace(5) %i7, align 4 %i9 = and i32 %idx, 15 - %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 - %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* - %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 + %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 ret void } @@ -436,18 +424,15 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() { bb: %padding = alloca [64 x i32], align 4, addrspace(5) %i = alloca [32 x float], align 4, addrspace(5) - %pad_gep = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %padding, i32 0, i32 undef - %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4 - %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* + %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef + %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4 %i2 = tail call i32 @llvm.amdgcn.workitem.id.x() %i3 = zext i32 %i2 to i64 - %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i2 - %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* - store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i2 + store volatile i32 15, ptr addrspace(5) %i7, align 4 %i9 = sub nsw i32 31, %i2 - %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 - %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* - %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 + %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 ret void } @@ -523,16 +508,13 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) { bb: %padding = alloca [64 x i32], align 4, addrspace(5) %i = alloca [32 x float], align 4, addrspace(5) - %pad_gep = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %padding, i32 0, i32 undef - %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4 - %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* - %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx - %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* - store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef + %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4 + %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx + store volatile i32 15, ptr addrspace(5) %i7, align 4 %i9 = and i32 %idx, 15 - %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 - %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* - %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 + %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 ret void } @@ -622,16 +604,13 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { bb: %padding = alloca [4096 x i32], align 4, addrspace(5) %i = alloca [32 x float], align 4, addrspace(5) - %pad_gep = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %padding, i32 0, i32 undef - %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4 - %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* - %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx - %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* - store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef + %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4 + %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx + store volatile i32 15, ptr addrspace(5) %i7, align 4 %i9 = and i32 %idx, 15 - %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 - %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* - %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 + %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 ret void } @@ -709,18 +688,15 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() { bb: %padding = alloca [4096 x i32], align 4, addrspace(5) %i = alloca [32 x float], align 4, addrspace(5) - %pad_gep = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %padding, i32 0, i32 undef - %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4 - %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* + %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef + %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4 %i2 = tail call i32 @llvm.amdgcn.workitem.id.x() %i3 = zext i32 %i2 to i64 - %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i2 - %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* - store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i2 + store volatile i32 15, ptr addrspace(5) %i7, align 4 %i9 = sub nsw i32 31, %i2 - %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 - %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* - %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 + %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 ret void } @@ -801,16 +777,13 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { bb: %padding = alloca [4096 x i32], align 4, addrspace(5) %i = alloca [32 x float], align 4, addrspace(5) - %pad_gep = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %padding, i32 0, i32 undef - %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4 - %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* - %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx - %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* - store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef + %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4 + %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx + store volatile i32 15, ptr addrspace(5) %i7, align 4 %i9 = and i32 %idx, 15 - %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 - %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* - %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 + %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 ret void } @@ -876,12 +849,12 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX11-NEXT: s_endpgm bb: %i = alloca [4096 x i32], align 4, addrspace(5) - %i1 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 undef - store volatile i32 13, i32 addrspace(5)* %i1, align 4 - %i7 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 4000 - store volatile i32 15, i32 addrspace(5)* %i7, align 4 - %i10 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 4000 - %i12 = load volatile i32, i32 addrspace(5)* %i10, align 4 + %i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 undef + store volatile i32 13, ptr addrspace(5) %i1, align 4 + %i7 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000 + store volatile i32 15, ptr addrspace(5) %i7, align 4 + %i10 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000 + %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 ret void } @@ -948,12 +921,12 @@ define void @store_load_large_imm_offset_foo() { ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: %i = alloca [4096 x i32], align 4, addrspace(5) - %i1 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 undef - store volatile i32 13, i32 addrspace(5)* %i1, align 4 - %i7 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 4000 - store volatile i32 15, i32 addrspace(5)* %i7, align 4 - %i10 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 4000 - %i12 = load volatile i32, i32 addrspace(5)* %i10, align 4 + %i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 undef + store volatile i32 13, ptr addrspace(5) %i1, align 4 + %i7 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000 + store volatile i32 15, ptr addrspace(5) %i7, align 4 + %i10 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000 + %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 ret void } @@ -1018,13 +991,13 @@ bb: %vidx = tail call i32 @llvm.amdgcn.workitem.id.x() %add1 = add nsw i32 %sidx, %vidx %add2 = add nsw i32 %add1, 256 - %gep = getelementptr inbounds [32 x i32], [32 x i32] addrspace(5)* %alloca, i32 0, i32 %add2 - store volatile i32 15, i32 addrspace(5)* %gep, align 4 - %load = load volatile i32, i32 addrspace(5)* %gep, align 4 + %gep = getelementptr inbounds [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %add2 + store volatile i32 15, ptr addrspace(5) %gep, align 4 + %load = load volatile i32, ptr addrspace(5) %gep, align 4 ret void } -define void @store_load_i64_aligned(i64 addrspace(5)* nocapture %arg) { +define void @store_load_i64_aligned(ptr addrspace(5) nocapture %arg) { ; GFX9-LABEL: store_load_i64_aligned: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1070,12 +1043,12 @@ define void @store_load_i64_aligned(i64 addrspace(5)* nocapture %arg) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - store volatile i64 15, i64 addrspace(5)* %arg, align 8 - %load = load volatile i64, i64 addrspace(5)* %arg, align 8 + store volatile i64 15, ptr addrspace(5) %arg, align 8 + %load = load volatile i64, ptr addrspace(5) %arg, align 8 ret void } -define void @store_load_i64_unaligned(i64 addrspace(5)* nocapture %arg) { +define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) { ; GFX9-LABEL: store_load_i64_unaligned: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1121,12 +1094,12 @@ define void @store_load_i64_unaligned(i64 addrspace(5)* nocapture %arg) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - store volatile i64 15, i64 addrspace(5)* %arg, align 1 - %load = load volatile i64, i64 addrspace(5)* %arg, align 1 + store volatile i64 15, ptr addrspace(5) %arg, align 1 + %load = load volatile i64, ptr addrspace(5) %arg, align 1 ret void } -define void @store_load_v3i32_unaligned(<3 x i32> addrspace(5)* nocapture %arg) { +define void @store_load_v3i32_unaligned(ptr addrspace(5) nocapture %arg) { ; GFX9-LABEL: store_load_v3i32_unaligned: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1188,12 +1161,12 @@ define void @store_load_v3i32_unaligned(<3 x i32> addrspace(5)* nocapture %arg) ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - store volatile <3 x i32> , <3 x i32> addrspace(5)* %arg, align 1 - %load = load volatile <3 x i32>, <3 x i32> addrspace(5)* %arg, align 1 + store volatile <3 x i32> , ptr addrspace(5) %arg, align 1 + %load = load volatile <3 x i32>, ptr addrspace(5) %arg, align 1 ret void } -define void @store_load_v4i32_unaligned(<4 x i32> addrspace(5)* nocapture %arg) { +define void @store_load_v4i32_unaligned(ptr addrspace(5) nocapture %arg) { ; GFX9-LABEL: store_load_v4i32_unaligned: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1260,8 +1233,8 @@ define void @store_load_v4i32_unaligned(<4 x i32> addrspace(5)* nocapture %arg) ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - store volatile <4 x i32> , <4 x i32> addrspace(5)* %arg, align 1 - %load = load volatile <4 x i32>, <4 x i32> addrspace(5)* %arg, align 1 + store volatile <4 x i32> , ptr addrspace(5) %arg, align 1 + %load = load volatile <4 x i32>, ptr addrspace(5) %arg, align 1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll index 6ac4dc886db07..3d5f908c8667f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll @@ -229,7 +229,7 @@ define float @v_test_fmax_legacy_ule_f32_multi_use(float %a, float %b) { %cmp = fcmp ogt float %a, %b %val0 = select i1 %cmp, float %a, float %b %val1 = zext i1 %cmp to i32 - store i32 %val1, i32 addrspace(3)* undef + store i32 %val1, ptr addrspace(3) undef ret float %val0 } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll index 92961ab1c4dda..0f207945062a6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll @@ -5,7 +5,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s -define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { +define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { ; SI-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 @@ -111,23 +111,23 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrs ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid - %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid - %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid - %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load volatile float, float addrspace(1)* %gep0 - %b = load volatile float, float addrspace(1)* %gep1 - %c = load volatile float, float addrspace(1)* %gep2 + %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid + %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid + %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid + %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid + %a = load volatile float, ptr addrspace(1) %gep0 + %b = load volatile float, ptr addrspace(1) %gep1 + %c = load volatile float, ptr addrspace(1) %gep2 %a.fneg = fsub float -0.0, %a %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b) %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b) %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) - store float %med3, float addrspace(1)* %outgep + store float %med3, ptr addrspace(1) %outgep ret void } -define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 { +define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { ; SI-LABEL: v_test_no_global_nnans_med3_f32_pat0_srcmod0: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 @@ -256,23 +256,23 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid - %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid - %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid - %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load volatile float, float addrspace(1)* %gep0 - %b = load volatile float, float addrspace(1)* %gep1 - %c = load volatile float, float addrspace(1)* %gep2 + %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid + %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid + %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid + %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid + %a = load volatile float, ptr addrspace(1) %gep0 + %b = load volatile float, ptr addrspace(1) %gep1 + %c = load volatile float, ptr addrspace(1) %gep2 %a.fneg = fsub float -0.0, %a %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b) %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b) %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) - store float %med3, float addrspace(1)* %outgep + store float %med3, ptr addrspace(1) %outgep ret void } -define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { +define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { ; SI-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 @@ -383,13 +383,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float add ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid - %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid - %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid - %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load volatile float, float addrspace(1)* %gep0 - %b = load volatile float, float addrspace(1)* %gep1 - %c = load volatile float, float addrspace(1)* %gep2 + %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid + %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid + %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid + %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid + %a = load volatile float, ptr addrspace(1) %gep0 + %b = load volatile float, ptr addrspace(1) %gep1 + %c = load volatile float, ptr addrspace(1) %gep2 %a.fneg = fsub float -0.0, %a %b.fabs = call float @llvm.fabs.f32(float %b) @@ -401,11 +401,11 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float add %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg) %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) - store float %med3, float addrspace(1)* %outgep + store float %med3, ptr addrspace(1) %outgep ret void } -define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { +define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { ; SI-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 @@ -521,13 +521,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float add ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid - %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid - %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid - %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load volatile float, float addrspace(1)* %gep0 - %b = load volatile float, float addrspace(1)* %gep1 - %c = load volatile float, float addrspace(1)* %gep2 + %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid + %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid + %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid + %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid + %a = load volatile float, ptr addrspace(1) %gep0 + %b = load volatile float, ptr addrspace(1) %gep1 + %c = load volatile float, ptr addrspace(1) %gep2 %a.fabs = call float @llvm.fabs.f32(float %a) %a.fabs.fneg = fsub float -0.0, %a.fabs @@ -541,11 +541,11 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float add %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg) %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) - store float %med3, float addrspace(1)* %outgep + store float %med3, ptr addrspace(1) %outgep ret void } -define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 { +define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { ; SI-LABEL: v_nnan_inputs_med3_f32_pat0: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 @@ -660,13 +660,13 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid - %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid - %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid - %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load volatile float, float addrspace(1)* %gep0 - %b = load volatile float, float addrspace(1)* %gep1 - %c = load volatile float, float addrspace(1)* %gep2 + %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid + %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid + %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid + %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid + %a = load volatile float, ptr addrspace(1) %gep0 + %b = load volatile float, ptr addrspace(1) %gep1 + %c = load volatile float, ptr addrspace(1) %gep2 %a.nnan = fadd nnan float %a, 1.0 %b.nnan = fadd nnan float %b, 2.0 @@ -676,7 +676,7 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan) %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan) %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) - store float %med3, float addrspace(1)* %outgep + store float %med3, ptr addrspace(1) %outgep ret void } @@ -685,7 +685,7 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, ; Negative patterns ; --------------------------------------------------------------------- -define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 { +define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { ; SI-LABEL: v_test_safe_med3_f32_pat0_multi_use0: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 @@ -825,19 +825,19 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace( ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid - %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid - %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid - %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load volatile float, float addrspace(1)* %gep0 - %b = load volatile float, float addrspace(1)* %gep1 - %c = load volatile float, float addrspace(1)* %gep2 + %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid + %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid + %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid + %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid + %a = load volatile float, ptr addrspace(1) %gep0 + %b = load volatile float, ptr addrspace(1) %gep1 + %c = load volatile float, ptr addrspace(1) %gep2 %tmp0 = call float @llvm.minnum.f32(float %a, float %b) - store volatile float %tmp0, float addrspace(1)* undef + store volatile float %tmp0, ptr addrspace(1) undef %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) - store float %med3, float addrspace(1)* %outgep + store float %med3, ptr addrspace(1) %outgep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll index 23b7c9d129aaf..10e6b3f1b47b1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll @@ -212,7 +212,7 @@ define float @v_test_fmin_legacy_ule_f32_multi_use(float %a, float %b) { %cmp = fcmp ule float %a, %b %val0 = select i1 %cmp, float %a, float %b %val1 = zext i1 %cmp to i32 - store i32 %val1, i32 addrspace(3)* undef + store i32 %val1, ptr addrspace(3) undef ret float %val0 } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll index 1aaf515cc1257..31d35df6d0990 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -march=amdgcn -mcpu=gfx940 -global-isel -verify-machineinstrs | FileCheck %s -check-prefix=GFX940 -declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0f32.f32(float* %ptr, float %data) -declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data) +declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data) +declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %ptr, <2 x half> %data) ; bf16 atomics use v2i16 argument since there is no bf16 data type in the llvm. -declare <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0v2i16(<2 x i16>* %ptr, <2 x i16> %data) -declare <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1v2i16(<2 x i16> addrspace(1)* %ptr, <2 x i16> %data) -declare <2 x half> @llvm.amdgcn.ds.fadd.v2f16(<2 x half> addrspace(3) * %ptr, <2 x half> %data, i32, i32, i1) -declare <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(<2 x i16> addrspace(3) * %ptr, <2 x i16> %data) +declare <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %ptr, <2 x i16> %data) +declare <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1(ptr addrspace(1) %ptr, <2 x i16> %data) +declare <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32, i32, i1) +declare <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data) -define amdgpu_kernel void @flat_atomic_fadd_f32_noret(float* %ptr, float %data) { +define amdgpu_kernel void @flat_atomic_fadd_f32_noret(ptr %ptr, float %data) { ; GFX940-LABEL: flat_atomic_fadd_f32_noret: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -20,11 +20,11 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret(float* %ptr, float %data) ; GFX940-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 ; GFX940-NEXT: s_endpgm - %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0f32.f32(float* %ptr, float %data) + %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data) ret void } -define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(float* %ptr) { +define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) { ; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -49,11 +49,11 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(float* %ptr) { ; GFX940-NEXT: s_cbranch_execnz .LBB1_1 ; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX940-NEXT: s_endpgm - %ret = atomicrmw fadd float* %ptr, float 4.0 seq_cst + %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst ret void } -define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(float* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 { ; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat_ieee: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -78,22 +78,22 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(float* %ptr) #0 { ; GFX940-NEXT: s_cbranch_execnz .LBB2_1 ; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX940-NEXT: s_endpgm - %ret = atomicrmw fadd float* %ptr, float 4.0 seq_cst + %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst ret void } -define float @flat_atomic_fadd_f32_rtn(float* %ptr, float %data) { +define float @flat_atomic_fadd_f32_rtn(ptr %ptr, float %data) { ; GFX940-LABEL: flat_atomic_fadd_f32_rtn: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] - %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0f32.f32(float* %ptr, float %data) + %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data) ret float %ret } -define float @flat_atomic_fadd_f32_rtn_pat(float* %ptr, float %data) { +define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) { ; GFX940-LABEL: flat_atomic_fadd_f32_rtn_pat: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -117,11 +117,11 @@ define float @flat_atomic_fadd_f32_rtn_pat(float* %ptr, float %data) { ; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX940-NEXT: v_mov_b32_e32 v0, v2 ; GFX940-NEXT: s_setpc_b64 s[30:31] - %ret = atomicrmw fadd float* %ptr, float 4.0 seq_cst + %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst ret float %ret } -define amdgpu_kernel void @flat_atomic_fadd_v2f16_noret(<2 x half>* %ptr, <2 x half> %data) { +define amdgpu_kernel void @flat_atomic_fadd_v2f16_noret(ptr %ptr, <2 x half> %data) { ; GFX940-LABEL: flat_atomic_fadd_v2f16_noret: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -131,22 +131,22 @@ define amdgpu_kernel void @flat_atomic_fadd_v2f16_noret(<2 x half>* %ptr, <2 x h ; GFX940-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 ; GFX940-NEXT: s_endpgm - %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %ptr, <2 x half> %data) ret void } -define <2 x half> @flat_atomic_fadd_v2f16_rtn(<2 x half>* %ptr, <2 x half> %data) { +define <2 x half> @flat_atomic_fadd_v2f16_rtn(ptr %ptr, <2 x half> %data) { ; GFX940-LABEL: flat_atomic_fadd_v2f16_rtn: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] - %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %ptr, <2 x half> %data) ret <2 x half> %ret } -define amdgpu_kernel void @flat_atomic_fadd_v2bf16_noret(<2 x i16>* %ptr, <2 x i16> %data) { +define amdgpu_kernel void @flat_atomic_fadd_v2bf16_noret(ptr %ptr, <2 x i16> %data) { ; GFX940-LABEL: flat_atomic_fadd_v2bf16_noret: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -156,22 +156,22 @@ define amdgpu_kernel void @flat_atomic_fadd_v2bf16_noret(<2 x i16>* %ptr, <2 x i ; GFX940-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 ; GFX940-NEXT: s_endpgm - %ret = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0v2i16(<2 x i16>* %ptr, <2 x i16> %data) + %ret = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %ptr, <2 x i16> %data) ret void } -define <2 x i16> @flat_atomic_fadd_v2bf16_rtn(<2 x i16>* %ptr, <2 x i16> %data) { +define <2 x i16> @flat_atomic_fadd_v2bf16_rtn(ptr %ptr, <2 x i16> %data) { ; GFX940-LABEL: flat_atomic_fadd_v2bf16_rtn: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] - %ret = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0v2i16(<2 x i16>* %ptr, <2 x i16> %data) + %ret = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %ptr, <2 x i16> %data) ret <2 x i16> %ret } -define amdgpu_kernel void @global_atomic_fadd_v2bf16_noret(<2 x i16> addrspace(1)* %ptr, <2 x i16> %data) { +define amdgpu_kernel void @global_atomic_fadd_v2bf16_noret(ptr addrspace(1) %ptr, <2 x i16> %data) { ; GFX940-LABEL: global_atomic_fadd_v2bf16_noret: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_load_dword s4, s[0:1], 0x2c @@ -181,22 +181,22 @@ define amdgpu_kernel void @global_atomic_fadd_v2bf16_noret(<2 x i16> addrspace(1 ; GFX940-NEXT: v_mov_b32_e32 v0, s4 ; GFX940-NEXT: global_atomic_pk_add_bf16 v1, v0, s[2:3] ; GFX940-NEXT: s_endpgm - %ret = call <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1v2i16(<2 x i16> addrspace(1)* %ptr, <2 x i16> %data) + %ret = call <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1(ptr addrspace(1) %ptr, <2 x i16> %data) ret void } -define <2 x i16> @global_atomic_fadd_v2bf16_rtn(<2 x i16> addrspace(1)* %ptr, <2 x i16> %data) { +define <2 x i16> @global_atomic_fadd_v2bf16_rtn(ptr addrspace(1) %ptr, <2 x i16> %data) { ; GFX940-LABEL: global_atomic_fadd_v2bf16_rtn: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] - %ret = call <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1v2i16(<2 x i16> addrspace(1)* %ptr, <2 x i16> %data) + %ret = call <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1(ptr addrspace(1) %ptr, <2 x i16> %data) ret <2 x i16> %ret } -define amdgpu_kernel void @local_atomic_fadd_v2f16_noret(<2 x half> addrspace(3)* %ptr, <2 x half> %data) { +define amdgpu_kernel void @local_atomic_fadd_v2f16_noret(ptr addrspace(3) %ptr, <2 x half> %data) { ; GFX940-LABEL: local_atomic_fadd_v2f16_noret: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -205,22 +205,22 @@ define amdgpu_kernel void @local_atomic_fadd_v2f16_noret(<2 x half> addrspace(3) ; GFX940-NEXT: v_mov_b32_e32 v1, s1 ; GFX940-NEXT: ds_pk_add_f16 v0, v1 ; GFX940-NEXT: s_endpgm - %ret = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(<2 x half> addrspace(3)* %ptr, <2 x half> %data, i32 0, i32 0, i1 0) + %ret = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32 0, i32 0, i1 0) ret void } -define <2 x half> @local_atomic_fadd_v2f16_rtn(<2 x half> addrspace(3)* %ptr, <2 x half> %data) { +define <2 x half> @local_atomic_fadd_v2f16_rtn(ptr addrspace(3) %ptr, <2 x half> %data) { ; GFX940-LABEL: local_atomic_fadd_v2f16_rtn: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: ds_pk_add_rtn_f16 v0, v0, v1 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] - %ret = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(<2 x half> addrspace(3)* %ptr, <2 x half> %data, i32 0, i32 0, i1 0) + %ret = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32 0, i32 0, i1 0) ret <2 x half> %ret } -define amdgpu_kernel void @local_atomic_fadd_v2bf16_noret(<2 x i16> addrspace(3)* %ptr, <2 x i16> %data) { +define amdgpu_kernel void @local_atomic_fadd_v2bf16_noret(ptr addrspace(3) %ptr, <2 x i16> %data) { ; GFX940-LABEL: local_atomic_fadd_v2bf16_noret: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -233,11 +233,11 @@ define amdgpu_kernel void @local_atomic_fadd_v2bf16_noret(<2 x i16> addrspace(3) ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 ; GFX940-NEXT: s_endpgm - %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(<2 x i16> addrspace(3)* %ptr, <2 x i16> %data) + %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data) ret void } -define <2 x i16> @local_atomic_fadd_v2bf16_rtn(<2 x i16> addrspace(3)* %ptr, <2 x i16> %data) { +define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16> %data) { ; GFX940-LABEL: local_atomic_fadd_v2bf16_rtn: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -247,7 +247,7 @@ define <2 x i16> @local_atomic_fadd_v2bf16_rtn(<2 x i16> addrspace(3)* %ptr, <2 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 ; GFX940-NEXT: s_setpc_b64 s[30:31] - %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(<2 x i16> addrspace(3)* %ptr, <2 x i16> %data) + %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data) ret <2 x i16> %ret } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll index 517fb872752a2..7db9d3769a743 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll @@ -7,13 +7,13 @@ declare double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double, <4 x i32>, i32 declare double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32 immarg) declare double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32, i32 immarg) declare double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32 immarg) -declare double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) -declare double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) -declare double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) -declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data) -declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data) -declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data) -declare double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* nocapture, double, i32, i32, i1) +declare double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) +declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) +declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) +declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data) +declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) +declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) +declare double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) nocapture, double, i32, i32, i1) define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { ; GFX90A-LABEL: raw_buffer_atomic_add_noret_f64: @@ -40,11 +40,11 @@ define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, doub ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) - store double %ret, double* undef + store double %ret, ptr undef ret void } -define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { +define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { ; GFX90A-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -61,7 +61,7 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inre ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) - store double %ret, double addrspace(1)* %out, align 8 + store double %ret, ptr addrspace(1) %out, align 8 ret void } @@ -90,11 +90,11 @@ define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, d ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) - store double %ret, double* undef + store double %ret, ptr undef ret void } -define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { +define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { ; GFX90A-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -111,7 +111,7 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> i ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) - store double %ret, double addrspace(1)* %out, align 8 + store double %ret, ptr addrspace(1) %out, align 8 ret void } @@ -140,11 +140,11 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, doub ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) - store double %ret, double* undef + store double %ret, ptr undef ret void } -define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { +define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { ; GFX90A-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -161,7 +161,7 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inre ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) - store double %ret, double addrspace(1)* %out, align 8 + store double %ret, ptr addrspace(1) %out, align 8 ret void } @@ -190,11 +190,11 @@ define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, d ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) - store double %ret, double* undef + store double %ret, ptr undef ret void } -define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { +define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { ; GFX90A-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -211,7 +211,7 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> i ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) - store double %ret, double addrspace(1)* %out, align 8 + store double %ret, ptr addrspace(1) %out, align 8 ret void } @@ -240,11 +240,11 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, doub ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) - store double %ret, double* undef + store double %ret, ptr undef ret void } -define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { +define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { ; GFX90A-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -261,7 +261,7 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inre ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) - store double %ret, double addrspace(1)* %out, align 8 + store double %ret, ptr addrspace(1) %out, align 8 ret void } @@ -290,11 +290,11 @@ define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, d ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) - store double %ret, double* undef + store double %ret, ptr undef ret void } -define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { +define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { ; GFX90A-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -311,11 +311,11 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> i ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) - store double %ret, double addrspace(1)* %out, align 8 + store double %ret, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @global_atomic_fadd_f64_noret(double addrspace(1)* %ptr, double %data) { +define amdgpu_kernel void @global_atomic_fadd_f64_noret(ptr addrspace(1) %ptr, double %data) { ; GFX90A-LABEL: global_atomic_fadd_f64_noret: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -325,11 +325,11 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret(double addrspace(1)* %pt ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_endpgm main_body: - %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret void } -define amdgpu_kernel void @global_atomic_fmin_f64_noret(double addrspace(1)* %ptr, double %data) { +define amdgpu_kernel void @global_atomic_fmin_f64_noret(ptr addrspace(1) %ptr, double %data) { ; GFX90A-LABEL: global_atomic_fmin_f64_noret: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -339,11 +339,11 @@ define amdgpu_kernel void @global_atomic_fmin_f64_noret(double addrspace(1)* %pt ; GFX90A-NEXT: global_atomic_min_f64 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_endpgm main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret void } -define amdgpu_kernel void @global_atomic_fmax_f64_noret(double addrspace(1)* %ptr, double %data) { +define amdgpu_kernel void @global_atomic_fmax_f64_noret(ptr addrspace(1) %ptr, double %data) { ; GFX90A-LABEL: global_atomic_fmax_f64_noret: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -353,11 +353,11 @@ define amdgpu_kernel void @global_atomic_fmax_f64_noret(double addrspace(1)* %pt ; GFX90A-NEXT: global_atomic_max_f64 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_endpgm main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret void } -define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)* %ptr) #1 { +define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %ptr) #1 { ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -384,11 +384,11 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)* ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 seq_cst + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst ret void } -define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(double addrspace(1)* %ptr) #1 { +define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(1) %ptr) #1 { ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -401,11 +401,11 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(double addrspa ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst ret void } -define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrspace(1)* %ptr) #1 { +define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace(1) %ptr) #1 { ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_system: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -432,11 +432,11 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrsp ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("one-as") seq_cst + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst ret void } -define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(double addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(1) %ptr) #0 { ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_flush: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -449,11 +449,11 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(double addrspa ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst ret void } -define double @global_atomic_fadd_f64_rtn(double addrspace(1)* %ptr, double %data) { +define double @global_atomic_fadd_f64_rtn(ptr addrspace(1) %ptr, double %data) { ; GFX90A-LABEL: global_atomic_fadd_f64_rtn: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -461,11 +461,11 @@ define double @global_atomic_fadd_f64_rtn(double addrspace(1)* %ptr, double %dat ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret } -define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double %data) #1 { +define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %data) #1 { ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -492,11 +492,11 @@ define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double ; GFX90A-NEXT: v_mov_b32_e32 v1, v3 ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 seq_cst + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst ret double %ret } -define double @global_atomic_fadd_f64_rtn_pat_agent(double addrspace(1)* %ptr, double %data) #1 { +define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, double %data) #1 { ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_agent: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -508,11 +508,11 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(double addrspace(1)* %ptr, d ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst ret double %ret } -define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr, double %data) #1 { +define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, double %data) #1 { ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_system: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -539,11 +539,11 @@ define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr, ; GFX90A-NEXT: v_mov_b32_e32 v1, v3 ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("one-as") seq_cst + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst ret double %ret } -define double @global_atomic_fmax_f64_rtn(double addrspace(1)* %ptr, double %data) { +define double @global_atomic_fmax_f64_rtn(ptr addrspace(1) %ptr, double %data) { ; GFX90A-LABEL: global_atomic_fmax_f64_rtn: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -551,11 +551,11 @@ define double @global_atomic_fmax_f64_rtn(double addrspace(1)* %ptr, double %dat ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret } -define double @global_atomic_fmin_f64_rtn(double addrspace(1)* %ptr, double %data) { +define double @global_atomic_fmin_f64_rtn(ptr addrspace(1) %ptr, double %data) { ; GFX90A-LABEL: global_atomic_fmin_f64_rtn: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -563,11 +563,11 @@ define double @global_atomic_fmin_f64_rtn(double addrspace(1)* %ptr, double %dat ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret } -define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double addrspace(1)* %ptr) { +define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrspace(1) %ptr) { ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -592,11 +592,11 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double ad ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst ret void } -define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 { +define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 { ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -623,11 +623,11 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 { ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double* %ptr, double 4.0 seq_cst + %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst ret void } -define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(double* %ptr) #1 { +define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 { ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -641,11 +641,11 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(double* %ptr) #1 ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("agent") seq_cst + %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst ret void } -define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) #1 { +define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 { ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_system: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -673,11 +673,11 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) # ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("one-as") seq_cst + %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst ret void } -define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 { +define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 { ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -704,11 +704,11 @@ define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 { ; GFX90A-NEXT: v_mov_b32_e32 v1, v3 ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = atomicrmw fadd double* %ptr, double 4.0 seq_cst + %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst ret double %ret } -define double @flat_atomic_fadd_f64_rtn_pat_agent(double* %ptr) #1 { +define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 { ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_agent: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -720,11 +720,11 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(double* %ptr) #1 { ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("agent") seq_cst + %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst ret double %ret } -define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 { +define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 { ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_system: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -752,11 +752,11 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 { ; GFX90A-NEXT: v_mov_b32_e32 v1, v3 ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("one-as") seq_cst + %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst ret double %ret } -define amdgpu_kernel void @flat_atomic_fadd_f64_noret(double* %ptr, double %data) { +define amdgpu_kernel void @flat_atomic_fadd_f64_noret(ptr %ptr, double %data) { ; GFX90A-LABEL: flat_atomic_fadd_f64_noret: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -766,11 +766,11 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret(double* %ptr, double %data ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] ; GFX90A-NEXT: s_endpgm main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data) ret void } -define double @flat_atomic_fadd_f64_rtn(double* %ptr, double %data) { +define double @flat_atomic_fadd_f64_rtn(ptr %ptr, double %data) { ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -778,11 +778,11 @@ define double @flat_atomic_fadd_f64_rtn(double* %ptr, double %data) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data) ret double %ret } -define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %ptr) { +define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) { ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -807,11 +807,11 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %pt ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("agent") seq_cst + %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst ret void } -define amdgpu_kernel void @flat_atomic_fmin_f64_noret(double* %ptr, double %data) { +define amdgpu_kernel void @flat_atomic_fmin_f64_noret(ptr %ptr, double %data) { ; GFX90A-LABEL: flat_atomic_fmin_f64_noret: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -821,11 +821,11 @@ define amdgpu_kernel void @flat_atomic_fmin_f64_noret(double* %ptr, double %data ; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] ; GFX90A-NEXT: s_endpgm main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) ret void } -define double @flat_atomic_fmin_f64_rtn(double* %ptr, double %data) { +define double @flat_atomic_fmin_f64_rtn(ptr %ptr, double %data) { ; GFX90A-LABEL: flat_atomic_fmin_f64_rtn: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -833,11 +833,11 @@ define double @flat_atomic_fmin_f64_rtn(double* %ptr, double %data) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) ret double %ret } -define amdgpu_kernel void @flat_atomic_fmax_f64_noret(double* %ptr, double %data) { +define amdgpu_kernel void @flat_atomic_fmax_f64_noret(ptr %ptr, double %data) { ; GFX90A-LABEL: flat_atomic_fmax_f64_noret: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -847,11 +847,11 @@ define amdgpu_kernel void @flat_atomic_fmax_f64_noret(double* %ptr, double %data ; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] ; GFX90A-NEXT: s_endpgm main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) ret void } -define double @flat_atomic_fmax_f64_rtn(double* %ptr, double %data) { +define double @flat_atomic_fmax_f64_rtn(ptr %ptr, double %data) { ; GFX90A-LABEL: flat_atomic_fmax_f64_rtn: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -859,11 +859,11 @@ define double @flat_atomic_fmax_f64_rtn(double* %ptr, double %data) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) ret double %ret } -define amdgpu_kernel void @local_atomic_fadd_f64_noret(double addrspace(3)* %ptr, double %data) { +define amdgpu_kernel void @local_atomic_fadd_f64_noret(ptr addrspace(3) %ptr, double %data) { ; GFX90A-LABEL: local_atomic_fadd_f64_noret: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dword s4, s[0:1], 0x24 @@ -874,11 +874,11 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret(double addrspace(3)* %ptr ; GFX90A-NEXT: ds_add_f64 v2, v[0:1] ; GFX90A-NEXT: s_endpgm main_body: - %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0) + %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) ret void } -define double @local_atomic_fadd_f64_rtn(double addrspace(3)* %ptr, double %data) { +define double @local_atomic_fadd_f64_rtn(ptr addrspace(3) %ptr, double %data) { ; GFX90A-LABEL: local_atomic_fadd_f64_rtn: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -888,11 +888,11 @@ define double @local_atomic_fadd_f64_rtn(double addrspace(3)* %ptr, double %data ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0) + %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) ret double %ret } -define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(double addrspace(3)* %ptr) #1 { +define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr) #1 { ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dword s0, s[0:1], 0x24 @@ -905,11 +905,11 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(double addrspace(3)* ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst + %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst ret void } -define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(double addrspace(3)* %ptr) #0 { +define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3) %ptr) #0 { ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dword s0, s[0:1], 0x24 @@ -922,11 +922,11 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(double addrspac ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst + %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst ret void } -define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double addrspace(3)* %ptr) #4 { +define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrspace(3) %ptr) #4 { ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dword s2, s[0:1], 0x24 @@ -950,11 +950,11 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double add ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst + %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst ret void } -define double @local_atomic_fadd_f64_rtn_pat(double addrspace(3)* %ptr, double %data) #1 { +define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data) #1 { ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_pat: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -965,11 +965,11 @@ define double @local_atomic_fadd_f64_rtn_pat(double addrspace(3)* %ptr, double % ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst + %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst ret double %ret } -define double @local_atomic_fadd_f64_rtn_ieee_unsafe(double addrspace(3)* %ptr, double %data) #2 { +define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, double %data) #2 { ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -979,11 +979,11 @@ define double @local_atomic_fadd_f64_rtn_ieee_unsafe(double addrspace(3)* %ptr, ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0) + %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) ret double %ret } -define double @local_atomic_fadd_f64_rtn_ieee_safe(double addrspace(3)* %ptr, double %data) #3 { +define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double %data) #3 { ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_safe: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -993,7 +993,7 @@ define double @local_atomic_fadd_f64_rtn_ieee_safe(double addrspace(3)* %ptr, do ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0) + %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) ret double %ret } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll index 1eaae2cfff9d6..7197dc663eded 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -amdgpu-scalarize-global-loads=false -enable-misched=0 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck --check-prefix=CI %s ; RUN: llc -global-isel -amdgpu-scalarize-global-loads=false -enable-misched=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s -define amdgpu_kernel void @frem_f16(half addrspace(1)* %out, half addrspace(1)* %in1, half addrspace(1)* %in2) #0 { +define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 { ; CI-LABEL: frem_f16: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -56,15 +56,15 @@ define amdgpu_kernel void @frem_f16(half addrspace(1)* %out, half addrspace(1)* ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: flat_store_short v[0:1], v2 ; VI-NEXT: s_endpgm - %gep2 = getelementptr half, half addrspace(1)* %in2, i32 4 - %r0 = load half, half addrspace(1)* %in1, align 4 - %r1 = load half, half addrspace(1)* %gep2, align 4 + %gep2 = getelementptr half, ptr addrspace(1) %in2, i32 4 + %r0 = load half, ptr addrspace(1) %in1, align 4 + %r1 = load half, ptr addrspace(1) %gep2, align 4 %r2 = frem half %r0, %r1 - store half %r2, half addrspace(1)* %out, align 4 + store half %r2, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @fast_frem_f16(half addrspace(1)* %out, half addrspace(1)* %in1, half addrspace(1)* %in2) #0 { +define amdgpu_kernel void @fast_frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 { ; CI-LABEL: fast_frem_f16: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -102,15 +102,15 @@ define amdgpu_kernel void @fast_frem_f16(half addrspace(1)* %out, half addrspace ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: flat_store_short v[0:1], v2 ; VI-NEXT: s_endpgm - %gep2 = getelementptr half, half addrspace(1)* %in2, i32 4 - %r0 = load half, half addrspace(1)* %in1, align 4 - %r1 = load half, half addrspace(1)* %gep2, align 4 + %gep2 = getelementptr half, ptr addrspace(1) %in2, i32 4 + %r0 = load half, ptr addrspace(1) %in1, align 4 + %r1 = load half, ptr addrspace(1) %gep2, align 4 %r2 = frem fast half %r0, %r1 - store half %r2, half addrspace(1)* %out, align 4 + store half %r2, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @unsafe_frem_f16(half addrspace(1)* %out, half addrspace(1)* %in1, half addrspace(1)* %in2) #1 { +define amdgpu_kernel void @unsafe_frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #1 { ; CI-LABEL: unsafe_frem_f16: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -148,15 +148,15 @@ define amdgpu_kernel void @unsafe_frem_f16(half addrspace(1)* %out, half addrspa ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: flat_store_short v[0:1], v2 ; VI-NEXT: s_endpgm - %gep2 = getelementptr half, half addrspace(1)* %in2, i32 4 - %r0 = load half, half addrspace(1)* %in1, align 4 - %r1 = load half, half addrspace(1)* %gep2, align 4 + %gep2 = getelementptr half, ptr addrspace(1) %in2, i32 4 + %r0 = load half, ptr addrspace(1) %in1, align 4 + %r1 = load half, ptr addrspace(1) %gep2, align 4 %r2 = frem half %r0, %r1 - store half %r2, half addrspace(1)* %out, align 4 + store half %r2, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) #0 { +define amdgpu_kernel void @frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 { ; CI-LABEL: frem_f32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -214,15 +214,15 @@ define amdgpu_kernel void @frem_f32(float addrspace(1)* %out, float addrspace(1) ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm - %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4 - %r0 = load float, float addrspace(1)* %in1, align 4 - %r1 = load float, float addrspace(1)* %gep2, align 4 + %gep2 = getelementptr float, ptr addrspace(1) %in2, i32 4 + %r0 = load float, ptr addrspace(1) %in1, align 4 + %r1 = load float, ptr addrspace(1) %gep2, align 4 %r2 = frem float %r0, %r1 - store float %r2, float addrspace(1)* %out, align 4 + store float %r2, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @fast_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) #0 { +define amdgpu_kernel void @fast_frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 { ; CI-LABEL: fast_frem_f32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -258,15 +258,15 @@ define amdgpu_kernel void @fast_frem_f32(float addrspace(1)* %out, float addrspa ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm - %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4 - %r0 = load float, float addrspace(1)* %in1, align 4 - %r1 = load float, float addrspace(1)* %gep2, align 4 + %gep2 = getelementptr float, ptr addrspace(1) %in2, i32 4 + %r0 = load float, ptr addrspace(1) %in1, align 4 + %r1 = load float, ptr addrspace(1) %gep2, align 4 %r2 = frem fast float %r0, %r1 - store float %r2, float addrspace(1)* %out, align 4 + store float %r2, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) #1 { +define amdgpu_kernel void @unsafe_frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #1 { ; CI-LABEL: unsafe_frem_f32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -302,15 +302,15 @@ define amdgpu_kernel void @unsafe_frem_f32(float addrspace(1)* %out, float addrs ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm - %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4 - %r0 = load float, float addrspace(1)* %in1, align 4 - %r1 = load float, float addrspace(1)* %gep2, align 4 + %gep2 = getelementptr float, ptr addrspace(1) %in2, i32 4 + %r0 = load float, ptr addrspace(1) %in1, align 4 + %r1 = load float, ptr addrspace(1) %gep2, align 4 %r2 = frem float %r0, %r1 - store float %r2, float addrspace(1)* %out, align 4 + store float %r2, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) #0 { +define amdgpu_kernel void @frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 { ; CI-LABEL: frem_f64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -366,14 +366,14 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace( ; VI-NEXT: v_mov_b32_e32 v3, s5 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm - %r0 = load double, double addrspace(1)* %in1, align 8 - %r1 = load double, double addrspace(1)* %in2, align 8 + %r0 = load double, ptr addrspace(1) %in1, align 8 + %r1 = load double, ptr addrspace(1) %in2, align 8 %r2 = frem double %r0, %r1 - store double %r2, double addrspace(1)* %out, align 8 + store double %r2, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @fast_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) #0 { +define amdgpu_kernel void @fast_frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 { ; CI-LABEL: fast_frem_f64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -423,14 +423,14 @@ define amdgpu_kernel void @fast_frem_f64(double addrspace(1)* %out, double addrs ; VI-NEXT: v_mov_b32_e32 v3, s5 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm - %r0 = load double, double addrspace(1)* %in1, align 8 - %r1 = load double, double addrspace(1)* %in2, align 8 + %r0 = load double, ptr addrspace(1) %in1, align 8 + %r1 = load double, ptr addrspace(1) %in2, align 8 %r2 = frem fast double %r0, %r1 - store double %r2, double addrspace(1)* %out, align 8 + store double %r2, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, +define amdgpu_kernel void @unsafe_frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CI-LABEL: unsafe_frem_f64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -480,15 +480,15 @@ define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double add ; VI-NEXT: v_mov_b32_e32 v3, s5 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm - double addrspace(1)* %in2) #1 { - %r0 = load double, double addrspace(1)* %in1, align 8 - %r1 = load double, double addrspace(1)* %in2, align 8 + ptr addrspace(1) %in2) #1 { + %r0 = load double, ptr addrspace(1) %in1, align 8 + %r1 = load double, ptr addrspace(1) %in2, align 8 %r2 = frem double %r0, %r1 - store double %r2, double addrspace(1)* %out, align 8 + store double %r2, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @frem_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in1, <2 x half> addrspace(1)* %in2) #0 { +define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 { ; CI-LABEL: frem_v2f16: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -580,15 +580,15 @@ define amdgpu_kernel void @frem_v2f16(<2 x half> addrspace(1)* %out, <2 x half> ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm - %gep2 = getelementptr <2 x half>, <2 x half> addrspace(1)* %in2, i32 4 - %r0 = load <2 x half>, <2 x half> addrspace(1)* %in1, align 8 - %r1 = load <2 x half>, <2 x half> addrspace(1)* %gep2, align 8 + %gep2 = getelementptr <2 x half>, ptr addrspace(1) %in2, i32 4 + %r0 = load <2 x half>, ptr addrspace(1) %in1, align 8 + %r1 = load <2 x half>, ptr addrspace(1) %gep2, align 8 %r2 = frem <2 x half> %r0, %r1 - store <2 x half> %r2, <2 x half> addrspace(1)* %out, align 8 + store <2 x half> %r2, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @frem_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in1, <4 x half> addrspace(1)* %in2) #0 { +define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 { ; CI-LABEL: frem_v4f16: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -744,15 +744,15 @@ define amdgpu_kernel void @frem_v4f16(<4 x half> addrspace(1)* %out, <4 x half> ; VI-NEXT: v_mov_b32_e32 v3, s5 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm - %gep2 = getelementptr <4 x half>, <4 x half> addrspace(1)* %in2, i32 4 - %r0 = load <4 x half>, <4 x half> addrspace(1)* %in1, align 16 - %r1 = load <4 x half>, <4 x half> addrspace(1)* %gep2, align 16 + %gep2 = getelementptr <4 x half>, ptr addrspace(1) %in2, i32 4 + %r0 = load <4 x half>, ptr addrspace(1) %in1, align 16 + %r1 = load <4 x half>, ptr addrspace(1) %gep2, align 16 %r2 = frem <4 x half> %r0, %r1 - store <4 x half> %r2, <4 x half> addrspace(1)* %out, align 16 + store <4 x half> %r2, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1, <2 x float> addrspace(1)* %in2) #0 { +define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 { ; CI-LABEL: frem_v2f32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -842,15 +842,15 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float ; VI-NEXT: v_mov_b32_e32 v3, s5 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm - %gep2 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in2, i32 4 - %r0 = load <2 x float>, <2 x float> addrspace(1)* %in1, align 8 - %r1 = load <2 x float>, <2 x float> addrspace(1)* %gep2, align 8 + %gep2 = getelementptr <2 x float>, ptr addrspace(1) %in2, i32 4 + %r0 = load <2 x float>, ptr addrspace(1) %in1, align 8 + %r1 = load <2 x float>, ptr addrspace(1) %gep2, align 8 %r2 = frem <2 x float> %r0, %r1 - store <2 x float> %r2, <2 x float> addrspace(1)* %out, align 8 + store <2 x float> %r2, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1, <4 x float> addrspace(1)* %in2) #0 { +define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 { ; CI-LABEL: frem_v4f32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1004,15 +1004,15 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; VI-NEXT: v_mov_b32_e32 v5, s5 ; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-NEXT: s_endpgm - %gep2 = getelementptr <4 x float>, <4 x float> addrspace(1)* %in2, i32 4 - %r0 = load <4 x float>, <4 x float> addrspace(1)* %in1, align 16 - %r1 = load <4 x float>, <4 x float> addrspace(1)* %gep2, align 16 + %gep2 = getelementptr <4 x float>, ptr addrspace(1) %in2, i32 4 + %r0 = load <4 x float>, ptr addrspace(1) %in1, align 16 + %r1 = load <4 x float>, ptr addrspace(1) %gep2, align 16 %r2 = frem <4 x float> %r0, %r1 - store <4 x float> %r2, <4 x float> addrspace(1)* %out, align 16 + store <4 x float> %r2, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1, <2 x double> addrspace(1)* %in2) #0 { +define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 { ; CI-LABEL: frem_v2f64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1098,11 +1098,11 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub ; VI-NEXT: v_mov_b32_e32 v5, s5 ; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-NEXT: s_endpgm - %gep2 = getelementptr <2 x double>, <2 x double> addrspace(1)* %in2, i32 4 - %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1, align 16 - %r1 = load <2 x double>, <2 x double> addrspace(1)* %gep2, align 16 + %gep2 = getelementptr <2 x double>, ptr addrspace(1) %in2, i32 4 + %r0 = load <2 x double>, ptr addrspace(1) %in1, align 16 + %r1 = load <2 x double>, ptr addrspace(1) %gep2, align 16 %r2 = frem <2 x double> %r0, %r1 - store <2 x double> %r2, <2 x double> addrspace(1)* %out, align 16 + store <2 x double> %r2, ptr addrspace(1) %out, align 16 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll index 75b2d1969673b..9de3335b3075c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -7,11 +7,11 @@ define i1 @i1_func_void() #0 { ; CHECK-LABEL: name: i1_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load i1, i1 addrspace(1)* undef + %val = load i1, ptr addrspace(1) undef ret i1 %val } @@ -19,11 +19,11 @@ define zeroext i1 @i1_zeroext_func_void() #0 { ; CHECK-LABEL: name: i1_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load i1, i1 addrspace(1)* undef + %val = load i1, ptr addrspace(1) undef ret i1 %val } @@ -31,11 +31,11 @@ define signext i1 @i1_signext_func_void() #0 { ; CHECK-LABEL: name: i1_signext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load i1, i1 addrspace(1)* undef + %val = load i1, ptr addrspace(1) undef ret i1 %val } @@ -43,11 +43,11 @@ define i7 @i7_func_void() #0 { ; CHECK-LABEL: name: i7_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load i7, i7 addrspace(1)* undef + %val = load i7, ptr addrspace(1) undef ret i7 %val } @@ -55,11 +55,11 @@ define zeroext i7 @i7_zeroext_func_void() #0 { ; CHECK-LABEL: name: i7_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load i7, i7 addrspace(1)* undef + %val = load i7, ptr addrspace(1) undef ret i7 %val } @@ -67,11 +67,11 @@ define signext i7 @i7_signext_func_void() #0 { ; CHECK-LABEL: name: i7_signext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load i7, i7 addrspace(1)* undef + %val = load i7, ptr addrspace(1) undef ret i7 %val } @@ -79,11 +79,11 @@ define i8 @i8_func_void() #0 { ; CHECK-LABEL: name: i8_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load i8, i8 addrspace(1)* undef + %val = load i8, ptr addrspace(1) undef ret i8 %val } @@ -91,11 +91,11 @@ define zeroext i8 @i8_zeroext_func_void() #0 { ; CHECK-LABEL: name: i8_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load i8, i8 addrspace(1)* undef + %val = load i8, ptr addrspace(1) undef ret i8 %val } @@ -103,11 +103,11 @@ define signext i8 @i8_signext_func_void() #0 { ; CHECK-LABEL: name: i8_signext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load i8, i8 addrspace(1)* undef + %val = load i8, ptr addrspace(1) undef ret i8 %val } @@ -115,11 +115,11 @@ define i16 @i16_func_void() #0 { ; CHECK-LABEL: name: i16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load i16, i16 addrspace(1)* undef + %val = load i16, ptr addrspace(1) undef ret i16 %val } @@ -127,11 +127,11 @@ define zeroext i16 @i16_zeroext_func_void() #0 { ; CHECK-LABEL: name: i16_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load i16, i16 addrspace(1)* undef + %val = load i16, ptr addrspace(1) undef ret i16 %val } @@ -139,11 +139,11 @@ define signext i16 @i16_signext_func_void() #0 { ; CHECK-LABEL: name: i16_signext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load i16, i16 addrspace(1)* undef + %val = load i16, ptr addrspace(1) undef ret i16 %val } @@ -151,11 +151,11 @@ define half @f16_func_void() #0 { ; CHECK-LABEL: name: f16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `half addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load half, half addrspace(1)* undef + %val = load half, ptr addrspace(1) undef ret half %val } @@ -163,11 +163,11 @@ define i24 @i24_func_void() #0 { ; CHECK-LABEL: name: i24_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load i24, i24 addrspace(1)* undef + %val = load i24, ptr addrspace(1) undef ret i24 %val } @@ -175,11 +175,11 @@ define zeroext i24 @i24_zeroext_func_void() #0 { ; CHECK-LABEL: name: i24_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load i24, i24 addrspace(1)* undef + %val = load i24, ptr addrspace(1) undef ret i24 %val } @@ -187,11 +187,11 @@ define signext i24 @i24_signext_func_void() #0 { ; CHECK-LABEL: name: i24_signext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load i24, i24 addrspace(1)* undef + %val = load i24, ptr addrspace(1) undef ret i24 %val } @@ -199,14 +199,14 @@ define <2 x i24> @v2i24_func_void() #0 { ; CHECK-LABEL: name: v2i24_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %val = load <2 x i24>, <2 x i24> addrspace(1)* undef + %val = load <2 x i24>, ptr addrspace(1) undef ret <2 x i24> %val } @@ -214,7 +214,7 @@ define <3 x i24> @v3i24_func_void() #0 { ; CHECK-LABEL: name: v3i24_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) @@ -223,7 +223,7 @@ define <3 x i24> @v3i24_func_void() #0 { ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - %val = load <3 x i24>, <3 x i24> addrspace(1)* undef + %val = load <3 x i24>, ptr addrspace(1) undef ret <3 x i24> %val } @@ -231,10 +231,10 @@ define i32 @i32_func_void() #0 { ; CHECK-LABEL: name: i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load i32, i32 addrspace(1)* undef + %val = load i32, ptr addrspace(1) undef ret i32 %val } @@ -242,13 +242,13 @@ define i48 @i48_func_void() #0 { ; CHECK-LABEL: name: i48_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %val = load i48, i48 addrspace(1)* undef, align 8 + %val = load i48, ptr addrspace(1) undef, align 8 ret i48 %val } @@ -256,13 +256,13 @@ define signext i48 @i48_signext_func_void() #0 { ; CHECK-LABEL: name: i48_signext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %val = load i48, i48 addrspace(1)* undef, align 8 + %val = load i48, ptr addrspace(1) undef, align 8 ret i48 %val } @@ -270,13 +270,13 @@ define zeroext i48 @i48_zeroext_func_void() #0 { ; CHECK-LABEL: name: i48_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %val = load i48, i48 addrspace(1)* undef, align 8 + %val = load i48, ptr addrspace(1) undef, align 8 ret i48 %val } @@ -284,12 +284,12 @@ define i64 @i64_func_void() #0 { ; CHECK-LABEL: name: i64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %val = load i64, i64 addrspace(1)* undef + %val = load i64, ptr addrspace(1) undef ret i64 %val } @@ -297,14 +297,14 @@ define i65 @i65_func_void() #0 { ; CHECK-LABEL: name: i65_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - %val = load i65, i65 addrspace(1)* undef + %val = load i65, ptr addrspace(1) undef ret i65 %val } @@ -312,14 +312,14 @@ define signext i65 @i65_signext_func_void() #0 { ; CHECK-LABEL: name: i65_signext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - %val = load i65, i65 addrspace(1)* undef + %val = load i65, ptr addrspace(1) undef ret i65 %val } @@ -327,14 +327,14 @@ define zeroext i65 @i65_zeroext_func_void() #0 { ; CHECK-LABEL: name: i65_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - %val = load i65, i65 addrspace(1)* undef + %val = load i65, ptr addrspace(1) undef ret i65 %val } @@ -342,10 +342,10 @@ define float @f32_func_void() #0 { ; CHECK-LABEL: name: f32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `float addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load float, float addrspace(1)* undef + %val = load float, ptr addrspace(1) undef ret float %val } @@ -353,12 +353,12 @@ define double @f64_func_void() #0 { ; CHECK-LABEL: name: f64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `double addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %val = load double, double addrspace(1)* undef + %val = load double, ptr addrspace(1) undef ret double %val } @@ -366,14 +366,14 @@ define <2 x double> @v2f64_func_void() #0 { ; CHECK-LABEL: name: v2f64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %val = load <2 x double>, <2 x double> addrspace(1)* undef + %val = load <2 x double>, ptr addrspace(1) undef ret <2 x double> %val } @@ -381,12 +381,12 @@ define <2 x i32> @v2i32_func_void() #0 { ; CHECK-LABEL: name: v2i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %val = load <2 x i32>, <2 x i32> addrspace(1)* undef + %val = load <2 x i32>, ptr addrspace(1) undef ret <2 x i32> %val } @@ -394,13 +394,13 @@ define <3 x i32> @v3i32_func_void() #0 { ; CHECK-LABEL: name: v3i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - %val = load <3 x i32>, <3 x i32> addrspace(1)* undef + %val = load <3 x i32>, ptr addrspace(1) undef ret <3 x i32> %val } @@ -408,14 +408,14 @@ define <4 x i32> @v4i32_func_void() #0 { ; CHECK-LABEL: name: v4i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %val = load <4 x i32>, <4 x i32> addrspace(1)* undef + %val = load <4 x i32>, ptr addrspace(1) undef ret <4 x i32> %val } @@ -423,7 +423,7 @@ define <5 x i32> @v5i32_func_void() #0 { ; CHECK-LABEL: name: v5i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `ptr addrspace(1) undef`, align 32, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -431,7 +431,7 @@ define <5 x i32> @v5i32_func_void() #0 { ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 - %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef + %val = load volatile <5 x i32>, ptr addrspace(1) undef ret <5 x i32> %val } @@ -439,7 +439,7 @@ define <8 x i32> @v8i32_func_void() #0 { ; CHECK-LABEL: name: v8i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -451,8 +451,8 @@ define <8 x i32> @v8i32_func_void() #0 { ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef - %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <8 x i32>, ptr addrspace(1) %ptr ret <8 x i32> %val } @@ -460,7 +460,7 @@ define <16 x i32> @v16i32_func_void() #0 { ; CHECK-LABEL: name: v16i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -480,8 +480,8 @@ define <16 x i32> @v16i32_func_void() #0 { ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 - %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef - %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <16 x i32>, ptr addrspace(1) %ptr ret <16 x i32> %val } @@ -489,7 +489,7 @@ define <32 x i32> @v32i32_func_void() #0 { ; CHECK-LABEL: name: v32i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -525,8 +525,8 @@ define <32 x i32> @v32i32_func_void() #0 { ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 - %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef - %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <32 x i32>, ptr addrspace(1) %ptr ret <32 x i32> %val } @@ -534,14 +534,14 @@ define <2 x i64> @v2i64_func_void() #0 { ; CHECK-LABEL: name: v2i64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %val = load <2 x i64>, <2 x i64> addrspace(1)* undef + %val = load <2 x i64>, ptr addrspace(1) undef ret <2 x i64> %val } @@ -549,7 +549,7 @@ define <3 x i64> @v3i64_func_void() #0 { ; CHECK-LABEL: name: v3i64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -559,8 +559,8 @@ define <3 x i64> @v3i64_func_void() #0 { ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef - %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <3 x i64>, ptr addrspace(1) %ptr ret <3 x i64> %val } @@ -568,7 +568,7 @@ define <4 x i64> @v4i64_func_void() #0 { ; CHECK-LABEL: name: v4i64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -580,8 +580,8 @@ define <4 x i64> @v4i64_func_void() #0 { ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef - %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <4 x i64>, ptr addrspace(1) %ptr ret <4 x i64> %val } @@ -589,7 +589,7 @@ define <5 x i64> @v5i64_func_void() #0 { ; CHECK-LABEL: name: v5i64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -603,8 +603,8 @@ define <5 x i64> @v5i64_func_void() #0 { ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 - %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef - %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <5 x i64>, ptr addrspace(1) %ptr ret <5 x i64> %val } @@ -612,7 +612,7 @@ define <8 x i64> @v8i64_func_void() #0 { ; CHECK-LABEL: name: v8i64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -632,8 +632,8 @@ define <8 x i64> @v8i64_func_void() #0 { ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 - %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef - %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <8 x i64>, ptr addrspace(1) %ptr ret <8 x i64> %val } @@ -641,7 +641,7 @@ define <16 x i64> @v16i64_func_void() #0 { ; CHECK-LABEL: name: v16i64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -677,8 +677,8 @@ define <16 x i64> @v16i64_func_void() #0 { ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 - %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef - %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <16 x i64>, ptr addrspace(1) %ptr ret <16 x i64> %val } @@ -686,10 +686,10 @@ define <2 x i16> @v2i16_func_void() #0 { ; CHECK-LABEL: name: v2i16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load <2 x i16>, <2 x i16> addrspace(1)* undef + %val = load <2 x i16>, ptr addrspace(1) undef ret <2 x i16> %val } @@ -697,10 +697,10 @@ define <2 x half> @v2f16_func_void() #0 { ; CHECK-LABEL: name: v2f16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %val = load <2 x half>, <2 x half> addrspace(1)* undef + %val = load <2 x half>, ptr addrspace(1) undef ret <2 x half> %val } @@ -708,7 +708,7 @@ define <3 x i16> @v3i16_func_void() #0 { ; CHECK-LABEL: name: v3i16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16) @@ -716,7 +716,7 @@ define <3 x i16> @v3i16_func_void() #0 { ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %val = load <3 x i16>, <3 x i16> addrspace(1)* undef + %val = load <3 x i16>, ptr addrspace(1) undef ret <3 x i16> %val } @@ -724,12 +724,12 @@ define <4 x i16> @v4i16_func_void() #0 { ; CHECK-LABEL: name: v4i16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %val = load <4 x i16>, <4 x i16> addrspace(1)* undef + %val = load <4 x i16>, ptr addrspace(1) undef ret <4 x i16> %val } @@ -737,12 +737,12 @@ define <4 x half> @v4f16_func_void() #0 { ; CHECK-LABEL: name: v4f16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %val = load <4 x half>, <4 x half> addrspace(1)* undef + %val = load <4 x half>, ptr addrspace(1) undef ret <4 x half> %val } @@ -750,7 +750,7 @@ define <5 x i16> @v5i16_func_void() #0 { ; CHECK-LABEL: name: v5i16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD1]](<5 x s16>) ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF @@ -760,8 +760,8 @@ define <5 x i16> @v5i16_func_void() #0 { ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>) ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef - %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <5 x i16>, ptr addrspace(1) %ptr ret <5 x i16> %val } @@ -769,7 +769,7 @@ define <8 x i16> @v8i16_func_void() #0 { ; CHECK-LABEL: name: v8i16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) @@ -777,8 +777,8 @@ define <8 x i16> @v8i16_func_void() #0 { ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef - %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <8 x i16>, ptr addrspace(1) %ptr ret <8 x i16> %val } @@ -786,7 +786,7 @@ define <16 x i16> @v16i16_func_void() #0 { ; CHECK-LABEL: name: v16i16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) @@ -798,8 +798,8 @@ define <16 x i16> @v16i16_func_void() #0 { ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef - %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <16 x i16>, ptr addrspace(1) %ptr ret <16 x i16> %val } @@ -807,7 +807,7 @@ define <16 x i8> @v16i8_func_void() #0 { ; CHECK-LABEL: name: v16i8_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) @@ -859,8 +859,8 @@ define <16 x i8> @v16i8_func_void() #0 { ; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) ; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 - %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef - %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <16 x i8>, ptr addrspace(1) %ptr ret <16 x i8> %val } @@ -868,7 +868,7 @@ define <2 x i8> @v2i8_func_void() #0 { ; CHECK-LABEL: name: v2i8_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `<2 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) @@ -877,7 +877,7 @@ define <2 x i8> @v2i8_func_void() #0 { ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %val = load <2 x i8>, <2 x i8> addrspace(1)* undef + %val = load <2 x i8>, ptr addrspace(1) undef ret <2 x i8> %val } @@ -885,7 +885,7 @@ define <3 x i8> @v3i8_func_void() #0 { ; CHECK-LABEL: name: v3i8_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) @@ -897,7 +897,7 @@ define <3 x i8> @v3i8_func_void() #0 { ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - %val = load <3 x i8>, <3 x i8> addrspace(1)* undef + %val = load <3 x i8>, ptr addrspace(1) undef ret <3 x i8> %val } @@ -905,7 +905,7 @@ define <4 x i8> @v4i8_func_void() #0 { ; CHECK-LABEL: name: v4i8_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) @@ -921,8 +921,8 @@ define <4 x i8> @v4i8_func_void() #0 { ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef - %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <4 x i8>, ptr addrspace(1) %ptr ret <4 x i8> %val } @@ -930,39 +930,38 @@ define {i8, i32} @struct_i8_i32_func_void() #0 { ; CHECK-LABEL: name: struct_i8_i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `ptr addrspace(1) undef` + 4, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LOAD1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef + %val = load { i8, i32 }, ptr addrspace(1) undef ret { i8, i32 } %val } -define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %arg0) #0 { +define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %arg0) #0 { ; CHECK-LABEL: name: void_func_sret_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (volatile load (s32) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.arg0, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) ; CHECK-NEXT: SI_RETURN - %val0 = load volatile i8, i8 addrspace(1)* undef - %val1 = load volatile i32, i32 addrspace(1)* undef - %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 - %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 - store i8 %val0, i8 addrspace(5)* %gep0 - store i32 %val1, i32 addrspace(5)* %gep1 + %val0 = load volatile i8, ptr addrspace(1) undef + %val1 = load volatile i32, ptr addrspace(1) undef + %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 0 + %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 1 + store i8 %val0, ptr addrspace(5) %gep0 + store i32 %val1, ptr addrspace(5) %gep1 ret void } @@ -977,16 +976,16 @@ define <33 x i32> @v33i32_func_void() #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) ; CHECK-NEXT: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) ; CHECK-NEXT: SI_RETURN - %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef - %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <33 x i32>, ptr addrspace(1) %ptr ret <33 x i32> %val } -define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) #0 { +define <33 x i32> @v33i32_func_v33i32_i32(ptr addrspace(1) %p, i32 %idx) #0 { ; CHECK-LABEL: name: v33i32_func_v33i32_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -1004,8 +1003,8 @@ define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY4]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) ; CHECK-NEXT: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) ; CHECK-NEXT: SI_RETURN - %gep = getelementptr inbounds <33 x i32>, <33 x i32> addrspace(1)* %p, i32 %idx - %val = load <33 x i32>, <33 x i32> addrspace(1)* %gep + %gep = getelementptr inbounds <33 x i32>, ptr addrspace(1) %p, i32 %idx + %val = load <33 x i32>, ptr addrspace(1) %gep ret <33 x i32> %val } @@ -1016,7 +1015,7 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) @@ -1026,8 +1025,8 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) ; CHECK-NEXT: SI_RETURN - %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef - %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load { <32 x i32>, i32 }, ptr addrspace(1) %ptr ret { <32 x i32>, i32 }%val } @@ -1038,7 +1037,7 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) @@ -1048,8 +1047,8 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CHECK-NEXT: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5) ; CHECK-NEXT: SI_RETURN - %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef - %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load { i32, <32 x i32> }, ptr addrspace(1) %ptr ret { i32, <32 x i32> }%val } @@ -1063,10 +1062,10 @@ define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) @@ -1076,10 +1075,10 @@ define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %load0 = load volatile i32, i32 addrspace(3)* undef - %load1 = load volatile i32, i32 addrspace(3)* undef - %load2 = load volatile i32, i32 addrspace(3)* undef - %load3 = load volatile i32, i32 addrspace(3)* undef + %load0 = load volatile i32, ptr addrspace(3) undef + %load1 = load volatile i32, ptr addrspace(3) undef + %load2 = load volatile i32, ptr addrspace(3) undef + %load3 = load volatile i32, ptr addrspace(3) undef %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0 %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1 @@ -1093,16 +1092,15 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { ; CHECK-LABEL: name: v3f32_struct_func_void_wasted_reg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) @@ -1112,10 +1110,10 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %load0 = load volatile float, float addrspace(3)* undef - %load1 = load volatile float, float addrspace(3)* undef - %load2 = load volatile float, float addrspace(3)* undef - %load3 = load volatile i32, i32 addrspace(3)* undef + %load0 = load volatile float, ptr addrspace(3) undef + %load1 = load volatile float, ptr addrspace(3) undef + %load2 = load volatile float, ptr addrspace(3) undef + %load3 = load volatile i32, ptr addrspace(3) undef %insert.0 = insertelement <3 x float> undef, float %load0, i32 0 %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1 @@ -1125,7 +1123,7 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { ret { <3 x float>, i32 } %insert.4 } -define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) #0 { +define void @void_func_sret_max_known_zero_bits(ptr addrspace(5) sret(i8) %arg0) #0 { ; CHECK-LABEL: name: void_func_sret_max_known_zero_bits ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 @@ -1139,19 +1137,19 @@ define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) - ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: SI_RETURN - %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32 + %arg0.int = ptrtoint ptr addrspace(5) %arg0 to i32 %lshr0 = lshr i32 %arg0.int, 16 %lshr1 = lshr i32 %arg0.int, 17 %lshr2 = lshr i32 %arg0.int, 18 - store volatile i32 %lshr0, i32 addrspace(3)* undef - store volatile i32 %lshr1, i32 addrspace(3)* undef - store volatile i32 %lshr2, i32 addrspace(3)* undef + store volatile i32 %lshr0, ptr addrspace(3) undef + store volatile i32 %lshr1, ptr addrspace(3) undef + store volatile i32 %lshr2, ptr addrspace(3) undef ret void } @@ -1159,7 +1157,7 @@ define i1022 @i1022_func_void() #0 { ; CHECK-LABEL: name: i1022_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -1195,7 +1193,7 @@ define i1022 @i1022_func_void() #0 { ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 - %val = load i1022, i1022 addrspace(1)* undef + %val = load i1022, ptr addrspace(1) undef ret i1022 %val } @@ -1203,7 +1201,7 @@ define signext i1022 @i1022_signext_func_void() #0 { ; CHECK-LABEL: name: i1022_signext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -1239,7 +1237,7 @@ define signext i1022 @i1022_signext_func_void() #0 { ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 - %val = load i1022, i1022 addrspace(1)* undef + %val = load i1022, ptr addrspace(1) undef ret i1022 %val } @@ -1247,7 +1245,7 @@ define zeroext i1022 @i1022_zeroext_func_void() #0 { ; CHECK-LABEL: name: i1022_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -1283,11 +1281,11 @@ define zeroext i1022 @i1022_zeroext_func_void() #0 { ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 - %val = load i1022, i1022 addrspace(1)* undef + %val = load i1022, ptr addrspace(1) undef ret i1022 %val } -%struct.with.ptrs = type { <32 x i32>, i32 addrspace(3)*, i32 addrspace(1)*, <2 x i8 addrspace(1)*> } +%struct.with.ptrs = type { <32 x i32>, ptr addrspace(3), ptr addrspace(1), <2 x ptr addrspace(1)> } define %struct.with.ptrs @ptr_in_struct_func_void() #0 { ; CHECK-LABEL: name: ptr_in_struct_func_void @@ -1296,16 +1294,16 @@ define %struct.with.ptrs @ptr_in_struct_func_void() #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `%struct.with.ptrs addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `%struct.with.ptrs addrspace(1)* undef` + 128, align 128, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `ptr addrspace(1) undef` + 128, align 128, addrspace 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `%struct.with.ptrs addrspace(1)* undef` + 136, addrspace 1) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `ptr addrspace(1) undef` + 136, addrspace 1) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `ptr addrspace(1) undef` + 144, addrspace 1) ; CHECK-NEXT: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) @@ -1317,7 +1315,7 @@ define %struct.with.ptrs @ptr_in_struct_func_void() #0 { ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) ; CHECK-NEXT: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5) ; CHECK-NEXT: SI_RETURN - %val = load volatile %struct.with.ptrs, %struct.with.ptrs addrspace(1)* undef + %val = load volatile %struct.with.ptrs, ptr addrspace(1) undef ret %struct.with.ptrs %val } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.v2i65.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.v2i65.ll index 16e5aff2b1103..09df9400197e6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.v2i65.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.v2i65.ll @@ -2,6 +2,6 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=irtranslator -verify-machineinstrs -o - %s define <2 x i65> @v2i65_func_void() #0 { - %val = load <2 x i65>, <2 x i65> addrspace(1)* undef + %val = load <2 x i65>, ptr addrspace(1) undef ret <2 x i65> %val } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll index 58db5db52a475..4441bacdb58db 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll @@ -4,7 +4,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908_GFX11 %s -define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_intrinsic(float addrspace(1)* %ptr, float %data) { +define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_intrinsic(ptr addrspace(1) %ptr, float %data) { ; GFX908_GFX11-LABEL: name: global_atomic_fadd_f32_no_rtn_intrinsic ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -25,11 +25,11 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_intrinsic(float addrspace(1 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret void } -define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_intrinsic(float addrspace(1)* inreg %ptr, float %data) { +define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_intrinsic(ptr addrspace(1) inreg %ptr, float %data) { ; GFX908_GFX11-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_intrinsic ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 @@ -52,11 +52,11 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_intrinsic(float addrs ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* inreg %ptr, float %data) + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) inreg %ptr, float %data) ret void } -define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_flat_intrinsic(float addrspace(1)* %ptr, float %data) { +define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_flat_intrinsic(ptr addrspace(1) %ptr, float %data) { ; GFX908_GFX11-LABEL: name: global_atomic_fadd_f32_no_rtn_flat_intrinsic ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -77,11 +77,11 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_flat_intrinsic(float addrsp ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) + %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret void } -define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic(float addrspace(1)* inreg %ptr, float %data) { +define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, float %data) { ; GFX908_GFX11-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 @@ -104,11 +104,11 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic(float ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)* inreg %ptr, float %data) + %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) inreg %ptr, float %data) ret void } -define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(float addrspace(1)* %ptr, float %data) #0 { +define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) #0 { ; GFX908_GFX11-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -129,11 +129,11 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(float addrspace(1 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = atomicrmw fadd float addrspace(1)* %ptr, float %data syncscope("wavefront") monotonic + %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic ret void } -define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(float addrspace(1)* inreg %ptr, float %data) #0 { +define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, float %data) #0 { ; GFX908_GFX11-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 @@ -156,11 +156,11 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(float addrs ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = atomicrmw fadd float addrspace(1)* %ptr, float %data syncscope("wavefront") monotonic + %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic ret void } -declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)*, float) -declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)*, float) +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float) +declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float) attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll index 0e97b51801b97..caa6307976fbc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s -define amdgpu_ps float @global_atomic_fadd_f32_rtn_intrinsic(float addrspace(1)* %ptr, float %data) { +define amdgpu_ps float @global_atomic_fadd_f32_rtn_intrinsic(ptr addrspace(1) %ptr, float %data) { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -26,11 +26,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_intrinsic(float addrspace(1)* ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret float %ret } -define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_intrinsic(float addrspace(1)* inreg %ptr, float %data) { +define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_intrinsic(ptr addrspace(1) inreg %ptr, float %data) { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 @@ -55,11 +55,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_intrinsic(float addrspa ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* inreg %ptr, float %data) + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) inreg %ptr, float %data) ret float %ret } -define amdgpu_ps float @global_atomic_fadd_f32_rtn_flat_intrinsic(float addrspace(1)* %ptr, float %data) { +define amdgpu_ps float @global_atomic_fadd_f32_rtn_flat_intrinsic(ptr addrspace(1) %ptr, float %data) { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_rtn_flat_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -82,11 +82,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_flat_intrinsic(float addrspac ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) + %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret float %ret } -define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_flat_intrinsic(float addrspace(1)* inreg %ptr, float %data) { +define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, float %data) { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_rtn_flat_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 @@ -111,11 +111,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_flat_intrinsic(float ad ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)* inreg %ptr, float %data) + %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) inreg %ptr, float %data) ret float %ret } -define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(float addrspace(1)* %ptr, float %data) #0 { +define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) #0 { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -138,11 +138,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(float addrspace(1)* ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %ret = atomicrmw fadd float addrspace(1)* %ptr, float %data syncscope("wavefront") monotonic + %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic ret float %ret } -define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(float addrspace(1)* inreg %ptr, float %data) #0 { +define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, float %data) #0 { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 @@ -167,11 +167,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(float addrspa ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %ret = atomicrmw fadd float addrspace(1)* %ptr, float %data syncscope("wavefront") monotonic + %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic ret float %ret } -declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)*, float) -declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)*, float) +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float) +declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float) attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll index 32f33683f0f5c..0c3fa41cb97fe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_intrinsic(double addrspace(1)* %ptr, double %data) { +define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_intrinsic(ptr addrspace(1) %ptr, double %data) { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -15,11 +15,11 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_intrinsic(double addrspace( ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret void } -define amdgpu_ps double @global_atomic_fadd_f64_rtn_intrinsic(double addrspace(1)* %ptr, double %data) { +define amdgpu_ps double @global_atomic_fadd_f64_rtn_intrinsic(ptr addrspace(1) %ptr, double %data) { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -38,11 +38,11 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_intrinsic(double addrspace(1 ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 - %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret } -define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_intrinsic(double addrspace(1)* inreg %ptr, double %data) { +define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_intrinsic(ptr addrspace(1) inreg %ptr, double %data) { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 @@ -56,11 +56,11 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_intrinsic(double addr ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret void } -define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_intrinsic(double addrspace(1)* inreg %ptr, double %data) { +define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_intrinsic(ptr addrspace(1) inreg %ptr, double %data) { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 @@ -80,11 +80,11 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_intrinsic(double addrs ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 - %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret } -define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_flat_intrinsic(double addrspace(1)* %ptr, double %data) { +define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_flat_intrinsic(ptr addrspace(1) %ptr, double %data) { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -97,11 +97,11 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_flat_intrinsic(double addrs ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret void } -define amdgpu_ps double @global_atomic_fadd_f64_rtn_flat_intrinsic(double addrspace(1)* %ptr, double %data) { +define amdgpu_ps double @global_atomic_fadd_f64_rtn_flat_intrinsic(ptr addrspace(1) %ptr, double %data) { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_rtn_flat_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -120,11 +120,11 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_flat_intrinsic(double addrsp ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 - %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret } -define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic(double addrspace(1)* inreg %ptr, double %data) { +define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, double %data) { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 @@ -138,11 +138,11 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic(double ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret void } -define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_flat_intrinsic(double addrspace(1)* inreg %ptr, double %data) { +define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, double %data) { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_flat_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 @@ -162,11 +162,11 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_flat_intrinsic(double ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 - %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret } -define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(double addrspace(1)* %ptr, double %data) #0 { +define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) #0 { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -179,11 +179,11 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(double addrspace( ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = atomicrmw fadd double addrspace(1)* %ptr, double %data syncscope("wavefront") monotonic + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic ret void } -define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(double addrspace(1)* %ptr, double %data) #0 { +define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) #0 { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -202,11 +202,11 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(double addrspace(1 ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 - %ret = atomicrmw fadd double addrspace(1)* %ptr, double %data syncscope("wavefront") monotonic + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic ret double %ret } -define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(double addrspace(1)* inreg %ptr, double %data) #0 { +define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) #0 { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 @@ -220,11 +220,11 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(double addr ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = atomicrmw fadd double addrspace(1)* %ptr, double %data syncscope("wavefront") monotonic + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic ret void } -define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(double addrspace(1)* inreg %ptr, double %data) #0 { +define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) #0 { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 @@ -244,11 +244,11 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(double addrs ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 - %ret = atomicrmw fadd double addrspace(1)* %ptr, double %data syncscope("wavefront") monotonic + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic ret double %ret } -declare double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)*, double) -declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)*, double) +declare double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1), double) +declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1), double) attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll index 6fe7eef2b10d1..247ef8254e2df 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_intrinsic(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { +define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_intrinsic(ptr addrspace(1) %ptr, <2 x half> %data) { ; GFX908-LABEL: name: global_atomic_fadd_v2f16_no_rtn_intrinsic ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -24,11 +24,11 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_intrinsic(<2 x half> addr ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret void } -define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_intrinsic(<2 x half> addrspace(1)* inreg %ptr, <2 x half> %data) { +define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_intrinsic(ptr addrspace(1) inreg %ptr, <2 x half> %data) { ; GFX908-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_intrinsic ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 @@ -51,11 +51,11 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_intrinsic(<2 x half ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret void } -define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_flat_intrinsic(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { +define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_flat_intrinsic(ptr addrspace(1) %ptr, <2 x half> %data) { ; GFX908-LABEL: name: global_atomic_fadd_v2f16_no_rtn_flat_intrinsic ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -76,11 +76,11 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_flat_intrinsic(<2 x half> ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret void } -define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic(<2 x half> addrspace(1)* inreg %ptr, <2 x half> %data) { +define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, <2 x half> %data) { ; GFX908-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 @@ -103,9 +103,9 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic(<2 x ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 - %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret void } -declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>) -declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>) +declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>) +declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll index 3ee4d957a572b..1fa5f575808f2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_intrinsic(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { +define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_intrinsic(ptr addrspace(1) %ptr, <2 x half> %data) { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -14,11 +14,11 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_intrinsic(<2 x half> a ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret <2 x half> %ret } -define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_intrinsic(<2 x half> addrspace(1)* inreg %ptr, <2 x half> %data) { +define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_intrinsic(ptr addrspace(1) inreg %ptr, <2 x half> %data) { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 @@ -31,11 +31,11 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_intrinsic(<2 x h ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret <2 x half> %ret } -define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_flat_intrinsic(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { +define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_flat_intrinsic(ptr addrspace(1) %ptr, <2 x half> %data) { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_rtn_flat_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -47,11 +47,11 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_flat_intrinsic(<2 x ha ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret <2 x half> %ret } -define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_flat_intrinsic(<2 x half> addrspace(1)* inreg %ptr, <2 x half> %data) { +define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, <2 x half> %data) { ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_rtn_flat_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 @@ -64,9 +64,9 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_flat_intrinsic(< ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret <2 x half> %ret } -declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>) -declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>) +declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>) +declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll index a216a8b509e87..4eddf087bbec2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll @@ -6,10 +6,10 @@ @external_private = external addrspace(5) global i32, align 4 @internal_private = internal addrspace(5) global i32 undef, align 4 -define i32 addrspace(5)* @fn_external_private() { - ret i32 addrspace(5)* @external_private +define ptr addrspace(5) @fn_external_private() { + ret ptr addrspace(5) @external_private } -define i32 addrspace(5)* @fn_internal_private() { - ret i32 addrspace(5)* @internal_private +define ptr addrspace(5) @fn_internal_private() { + ret ptr addrspace(5) @internal_private } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll index 0812972204354..b60dd6dea7f79 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll @@ -13,7 +13,7 @@ @internal_other = internal addrspace(999) global i32 9, align 4 -define i32 addrspace(4)* @external_constant_got() { +define ptr addrspace(4) @external_constant_got() { ; GCN-LABEL: name: external_constant_got ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 12, implicit-def $scc @@ -22,10 +22,10 @@ define i32 addrspace(4)* @external_constant_got() { ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - ret i32 addrspace(4)* @external_constant + ret ptr addrspace(4) @external_constant } -define i32 addrspace(1)* @external_global_got() { +define ptr addrspace(1) @external_global_got() { ; GCN-LABEL: name: external_global_got ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 12, implicit-def $scc @@ -34,10 +34,10 @@ define i32 addrspace(1)* @external_global_got() { ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - ret i32 addrspace(1)* @external_global + ret ptr addrspace(1) @external_global } -define i32 addrspace(999)* @external_other_got() { +define ptr addrspace(999) @external_other_got() { ; GCN-LABEL: name: external_other_got ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 12, implicit-def $scc @@ -46,10 +46,10 @@ define i32 addrspace(999)* @external_other_got() { ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - ret i32 addrspace(999)* @external_other + ret ptr addrspace(999) @external_other } -define i32 addrspace(4)* @internal_constant_pcrel() { +define ptr addrspace(4) @internal_constant_pcrel() { ; GCN-LABEL: name: internal_constant_pcrel ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant + 4, target-flags(amdgpu-rel32-hi) @internal_constant + 12, implicit-def $scc @@ -57,10 +57,10 @@ define i32 addrspace(4)* @internal_constant_pcrel() { ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - ret i32 addrspace(4)* @internal_constant + ret ptr addrspace(4) @internal_constant } -define i32 addrspace(1)* @internal_global_pcrel() { +define ptr addrspace(1) @internal_global_pcrel() { ; GCN-LABEL: name: internal_global_pcrel ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 12, implicit-def $scc @@ -68,10 +68,10 @@ define i32 addrspace(1)* @internal_global_pcrel() { ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - ret i32 addrspace(1)* @internal_global + ret ptr addrspace(1) @internal_global } -define i32 addrspace(999)* @internal_other_pcrel() { +define ptr addrspace(999) @internal_other_pcrel() { ; GCN-LABEL: name: internal_other_pcrel ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 12, implicit-def $scc @@ -79,10 +79,10 @@ define i32 addrspace(999)* @internal_other_pcrel() { ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - ret i32 addrspace(999)* @internal_other + ret ptr addrspace(999) @internal_other } -define i32 addrspace(6)* @external_constant32_got() { +define ptr addrspace(6) @external_constant32_got() { ; GCN-LABEL: name: external_constant32_got ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 12, implicit-def $scc @@ -90,15 +90,15 @@ define i32 addrspace(6)* @external_constant32_got() { ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0 ; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6) ; GCN-NEXT: SI_RETURN implicit $vgpr0 - ret i32 addrspace(6)* @external_constant32 + ret ptr addrspace(6) @external_constant32 } -define i32 addrspace(6)* @internal_constant32_pcrel() { +define ptr addrspace(6) @internal_constant32_pcrel() { ; GCN-LABEL: name: internal_constant32_pcrel ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32 + 4, target-flags(amdgpu-rel32-hi) @internal_constant32 + 12, implicit-def $scc ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0 ; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6) ; GCN-NEXT: SI_RETURN implicit $vgpr0 - ret i32 addrspace(6)* @internal_constant32 + ret ptr addrspace(6) @internal_constant32 } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll index d6c675a636e9a..21fa4afb374cd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll @@ -12,19 +12,19 @@ ; CHECK-LABEL: {{^}}dynamic_shared_array_0: ; CHECK: v_add_u32_e32 v{{[0-9]+}}, 0x800, v{{[0-9]+}} -define amdgpu_kernel void @dynamic_shared_array_0(float addrspace(1)* %out) { +define amdgpu_kernel void @dynamic_shared_array_0(ptr addrspace(1) %out) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() - %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %tid.x - %val0 = load float, float addrspace(3)* %arrayidx0, align 4 - %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x - store float %val0, float addrspace(3)* %arrayidx1, align 4 + %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %tid.x + %val0 = load float, ptr addrspace(3) %arrayidx0, align 4 + %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x + store float %val0, ptr addrspace(3) %arrayidx1, align 4 ret void } ; CHECK-LABEL: {{^}}dynamic_shared_array_1: ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} ; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0xc00, [[IDX]] -define amdgpu_kernel void @dynamic_shared_array_1(float addrspace(1)* %out, i32 %cond) { +define amdgpu_kernel void @dynamic_shared_array_1(ptr addrspace(1) %out, i32 %cond) { entry: %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() %idx.0 = add nsw i32 %tid.x, 64 @@ -32,19 +32,19 @@ entry: br i1 %tmp, label %if, label %else if: ; preds = %entry - %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0 - %val0 = load float, float addrspace(3)* %arrayidx0, align 4 + %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0 + %val0 = load float, ptr addrspace(3) %arrayidx0, align 4 br label %endif else: ; preds = %entry - %arrayidx1 = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0 - %val1 = load float, float addrspace(3)* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds [256 x float], ptr addrspace(3) @lds1, i32 0, i32 %idx.0 + %val1 = load float, ptr addrspace(3) %arrayidx1, align 4 br label %endif endif: ; preds = %else, %if %val = phi float [ %val0, %if ], [ %val1, %else ] - %arrayidx = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x - store float %val, float addrspace(3)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x + store float %val, ptr addrspace(3) %arrayidx, align 4 ret void } @@ -54,10 +54,10 @@ endif: ; preds = %else, %if define amdgpu_kernel void @dynamic_shared_array_2(i32 %idx) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() %vidx = add i32 %tid.x, %idx - %arrayidx0 = getelementptr inbounds [4096 x float], [4096 x float] addrspace(3)* @lds2, i32 0, i32 %vidx - %val0 = load float, float addrspace(3)* %arrayidx0, align 4 - %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x - store float %val0, float addrspace(3)* %arrayidx1, align 4 + %arrayidx0 = getelementptr inbounds [4096 x float], ptr addrspace(3) @lds2, i32 0, i32 %vidx + %val0 = load float, ptr addrspace(3) %arrayidx0, align 4 + %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x + store float %val0, ptr addrspace(3) %arrayidx1, align 4 ret void } @@ -69,11 +69,11 @@ define amdgpu_kernel void @dynamic_shared_array_2(i32 %idx) { define amdgpu_kernel void @dynamic_shared_array_3(i32 %idx) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() %vidx = add i32 %tid.x, %idx - %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx - %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4 + %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx + %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4 %val1 = uitofp i8 %val0 to float - %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x - store float %val1, float addrspace(3)* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x + store float %val1, ptr addrspace(3) %arrayidx1, align 4 ret void } @@ -85,14 +85,14 @@ define amdgpu_kernel void @dynamic_shared_array_3(i32 %idx) { define amdgpu_kernel void @dynamic_shared_array_4(i32 %idx) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() %vidx = add i32 %tid.x, %idx - %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx - %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4 + %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx + %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4 %val1 = uitofp i8 %val0 to float %val2 = uitofp i8 %val0 to double - %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x - store float %val1, float addrspace(3)* %arrayidx1, align 4 - %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared1, i32 0, i32 %tid.x - store double %val2, double addrspace(3)* %arrayidx2, align 4 + %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x + store float %val1, ptr addrspace(3) %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_shared1, i32 0, i32 %tid.x + store double %val2, ptr addrspace(3) %arrayidx2, align 4 ret void } @@ -103,14 +103,14 @@ define amdgpu_kernel void @dynamic_shared_array_4(i32 %idx) { define amdgpu_kernel void @dynamic_shared_array_5(i32 %idx) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() %vidx = add i32 %tid.x, %idx - %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx - %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4 + %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx + %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4 %val1 = uitofp i8 %val0 to float %val2 = uitofp i8 %val0 to double - %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x - store float %val1, float addrspace(3)* %arrayidx1, align 4 - %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared2, i32 0, i32 %tid.x - store double %val2, double addrspace(3)* %arrayidx2, align 4 + %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x + store float %val1, ptr addrspace(3) %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_shared2, i32 0, i32 %tid.x + store double %val2, ptr addrspace(3) %arrayidx2, align 4 ret void } @@ -121,14 +121,14 @@ define amdgpu_kernel void @dynamic_shared_array_5(i32 %idx) { define amdgpu_kernel void @dynamic_shared_array_6(i32 %idx) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() %vidx = add i32 %tid.x, %idx - %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx - %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4 + %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx + %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4 %val1 = uitofp i8 %val0 to float %val2 = uitofp i8 %val0 to double - %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x - store float %val1, float addrspace(3)* %arrayidx1, align 4 - %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared3, i32 0, i32 %tid.x - store double %val2, double addrspace(3)* %arrayidx2, align 4 + %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x + store float %val1, ptr addrspace(3) %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_shared3, i32 0, i32 %tid.x + store double %val2, ptr addrspace(3) %arrayidx2, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll index 7828a632f9e46..a5c9b896eb0d0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll @@ -7,7 +7,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=GFX9V4 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=GFX9V5 %s -define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) { +define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) { ; GFX8V3-LABEL: addrspacecast: ; GFX8V3: ; %bb.0: ; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -159,14 +159,14 @@ define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 add ; GFX9V5-NEXT: flat_store_dword v[0:1], v2 ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: s_endpgm - %flat.private = addrspacecast i32 addrspace(5)* %ptr.private to i32* - %flat.local = addrspacecast i32 addrspace(3)* %ptr.local to i32* - store volatile i32 1, i32* %flat.private - store volatile i32 2, i32* %flat.local + %flat.private = addrspacecast ptr addrspace(5) %ptr.private to ptr + %flat.local = addrspacecast ptr addrspace(3) %ptr.local to ptr + store volatile i32 1, ptr %flat.private + store volatile i32 2, ptr %flat.local ret void } -define amdgpu_kernel void @llvm_amdgcn_is_shared(i8* %ptr) { +define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) { ; GFX8V3-LABEL: llvm_amdgcn_is_shared: ; GFX8V3: ; %bb.0: ; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -244,13 +244,13 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(i8* %ptr) { ; GFX9V5-NEXT: global_store_dword v[0:1], v0, off ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: s_endpgm - %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr) + %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %ptr) %zext = zext i1 %is.shared to i32 - store volatile i32 %zext, i32 addrspace(1)* undef + store volatile i32 %zext, ptr addrspace(1) undef ret void } -define amdgpu_kernel void @llvm_amdgcn_is_private(i8* %ptr) { +define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) { ; GFX8V3-LABEL: llvm_amdgcn_is_private: ; GFX8V3: ; %bb.0: ; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -328,9 +328,9 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(i8* %ptr) { ; GFX9V5-NEXT: global_store_dword v[0:1], v0, off ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: s_endpgm - %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr) + %is.private = call i1 @llvm.amdgcn.is.private(ptr %ptr) %zext = zext i1 %is.private to i32 - store volatile i32 %zext, i32 addrspace(1)* undef + store volatile i32 %zext, ptr addrspace(1) undef ret void } @@ -395,7 +395,7 @@ define amdgpu_kernel void @llvm_debugtrap() { unreachable } -define amdgpu_kernel void @llvm_amdgcn_queue_ptr(i64 addrspace(1)* %ptr) { +define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) { ; GFX8V3-LABEL: llvm_amdgcn_queue_ptr: ; GFX8V3: ; %bb.0: ; GFX8V3-NEXT: v_mov_b32_e32 v0, s6 @@ -521,22 +521,22 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(i64 addrspace(1)* %ptr) { ; GFX9V5-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: s_endpgm - %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() - %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() + %queue.ptr = call ptr addrspace(4) @llvm.amdgcn.queue.ptr() + %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %dispatch.id = call i64 @llvm.amdgcn.dispatch.id() - %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr - %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr - %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr - store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr + %queue.load = load volatile i8, ptr addrspace(4) %queue.ptr + %implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr + %dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr + store volatile i64 %dispatch.id, ptr addrspace(1) %ptr ret void } -declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() -declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() +declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() +declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() declare i64 @llvm.amdgcn.dispatch.id() -declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -declare i1 @llvm.amdgcn.is.shared(i8*) -declare i1 @llvm.amdgcn.is.private(i8*) +declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +declare i1 @llvm.amdgcn.is.shared(ptr) +declare i1 @llvm.amdgcn.is.private(ptr) declare void @llvm.trap() declare void @llvm.debugtrap() diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll index 15b92df2f1601..9a5dc978e8331 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll @@ -4,12 +4,12 @@ ; ERR: remark: :0:0: unable to translate instruction: call: ' %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:12]}"()' (in function: return_type_is_too_big_vector) ; ERR: remark: :0:0: unable to translate instruction: call: ' %reg = call i64 asm sideeffect "; def $0", "={v8}"()' (in function: return_type_is_too_big_scalar) -; ERR: remark: :0:0: unable to translate instruction: call: ' %reg = call i8 addrspace(1)* asm sideeffect "; def $0", "={v8}"()' (in function: return_type_is_too_big_pointer) -; ERR: remark: :0:0: unable to translate instruction: call: ' %reg = call i8 addrspace(3)* asm sideeffect "; def $0", "={v[8:9]}"()' (in function: return_type_is_too_small_pointer) +; ERR: remark: :0:0: unable to translate instruction: call: ' %reg = call ptr addrspace(1) asm sideeffect "; def $0", "={v8}"()' (in function: return_type_is_too_big_pointer) +; ERR: remark: :0:0: unable to translate instruction: call: ' %reg = call ptr addrspace(3) asm sideeffect "; def $0", "={v[8:9]}"()' (in function: return_type_is_too_small_pointer) ; ERR: remark: :0:0: unable to translate instruction: call: ' call void asm sideeffect "; use $0", "{v[0:9]}"(<8 x i32> %arg)' (in function: use_vector_too_big) ; ERR: remark: :0:0: unable to translate instruction: call: ' call void asm sideeffect "; use $0", "{v0}"(i64 %arg)' (in function: use_scalar_too_small) -; ERR: remark: :0:0: unable to translate instruction: call: ' call void asm sideeffect "; use $0", "{v0}"(i8 addrspace(1)* %arg)' (in function: use_pointer_too_small) -; ERR: remark: :0:0: unable to translate instruction: call: ' call void asm sideeffect "; use $0", "{v[0:1]}"(i32 addrspace(3)* %arg)' (in function: use_pointer_too_big) +; ERR: remark: :0:0: unable to translate instruction: call: ' call void asm sideeffect "; use $0", "{v0}"(ptr addrspace(1) %arg)' (in function: use_pointer_too_small) +; ERR: remark: :0:0: unable to translate instruction: call: ' call void asm sideeffect "; use $0", "{v[0:1]}"(ptr addrspace(3) %arg)' (in function: use_pointer_too_big) ; This asm is broken because it's using a 5 element wide physical @@ -59,7 +59,7 @@ define i32 @return_type_is_too_small_scalar() { ret i32 %reg } -define i8 addrspace(1)* @return_type_is_too_big_pointer() { +define ptr addrspace(1) @return_type_is_too_big_pointer() { ; CHECK-LABEL: name: return_type_is_too_big_pointer ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -67,11 +67,11 @@ define i8 addrspace(1)* @return_type_is_too_big_pointer() { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.0): ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr8 - %reg = call i8 addrspace(1)* asm sideeffect "; def $0", "={v8}" () - ret i8 addrspace(1)* %reg + %reg = call ptr addrspace(1) asm sideeffect "; def $0", "={v8}" () + ret ptr addrspace(1) %reg } -define i8 addrspace(3)* @return_type_is_too_small_pointer() { +define ptr addrspace(3) @return_type_is_too_small_pointer() { ; CHECK-LABEL: name: return_type_is_too_small_pointer ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -79,8 +79,8 @@ define i8 addrspace(3)* @return_type_is_too_small_pointer() { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.0): ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr8_vgpr9 - %reg = call i8 addrspace(3)* asm sideeffect "; def $0", "={v[8:9]}" () - ret i8 addrspace(3)* %reg + %reg = call ptr addrspace(3) asm sideeffect "; def $0", "={v[8:9]}" () + ret ptr addrspace(3) %reg } define void @use_vector_too_small(<8 x i32> %arg) { @@ -154,7 +154,7 @@ define void @use_scalar_too_big(i32 %arg) { ret void } -define void @use_pointer_too_small(i8 addrspace(1)* %arg) { +define void @use_pointer_too_small(ptr addrspace(1) %arg) { ; CHECK-LABEL: name: use_pointer_too_small ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -165,11 +165,11 @@ define void @use_pointer_too_small(i8 addrspace(1)* %arg) { ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.0): - call void asm sideeffect "; use $0", "{v0}"(i8 addrspace(1)* %arg) + call void asm sideeffect "; use $0", "{v0}"(ptr addrspace(1) %arg) ret void } -define void @use_pointer_too_big(i32 addrspace(3)* %arg) { +define void @use_pointer_too_big(ptr addrspace(3) %arg) { ; CHECK-LABEL: name: use_pointer_too_big ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -178,6 +178,6 @@ define void @use_pointer_too_big(i32 addrspace(3)* %arg) { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.0): - call void asm sideeffect "; use $0", "{v[0:1]}"(i32 addrspace(3)* %arg) + call void asm sideeffect "; use $0", "{v[0:1]}"(ptr addrspace(3) %arg) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll index ecff0fb2b1ceb..8ec842021e176 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll @@ -4,7 +4,7 @@ ; Check lowering of some large insertelement that use the stack ; instead of register indexing. -define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out.ptr, <64 x i32> addrspace(1)* %ptr, i32 %val, i32 %idx) #0 { +define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr addrspace(1) %ptr, i32 %val, i32 %idx) #0 { ; GCN-LABEL: v_insert_v64i32_varidx: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[20:23], s[4:5], 0x0 @@ -265,9 +265,9 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out. ; GCN-NEXT: global_store_dwordx4 v0, v[56:59], s[20:21] offset:224 ; GCN-NEXT: global_store_dwordx4 v0, v[60:63], s[20:21] offset:240 ; GCN-NEXT: s_endpgm - %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr + %vec = load <64 x i32>, ptr addrspace(1) %ptr %insert = insertelement <64 x i32> %vec, i32 %val, i32 %idx - store <64 x i32> %insert, <64 x i32> addrspace(1)* %out.ptr + store <64 x i32> %insert, ptr addrspace(1) %out.ptr ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll index 922fdc9dd4fdb..5f2e1e9d509c9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll @@ -5,7 +5,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s -define amdgpu_ps void @insertelement_s_v2i16_s_s(<2 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_s_v2i16_s_s(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_s_v2i16_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 @@ -91,13 +91,13 @@ define amdgpu_ps void @insertelement_s_v2i16_s_s(<2 x i16> addrspace(4)* inreg % ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <2 x i16>, <2 x i16> addrspace(4)* %ptr + %vec = load <2 x i16>, ptr addrspace(4) %ptr %insert = insertelement <2 x i16> %vec, i16 %val, i32 %idx - store <2 x i16> %insert, <2 x i16> addrspace(1)* null + store <2 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v2i16_s_s(<2 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_v_v2i16_s_s(ptr addrspace(1) %ptr, i16 inreg %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_v_v2i16_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dword v2, v[0:1], off @@ -181,13 +181,13 @@ define amdgpu_ps void @insertelement_v_v2i16_s_s(<2 x i16> addrspace(1)* %ptr, i ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <2 x i16>, <2 x i16> addrspace(1 )* %ptr + %vec = load <2 x i16>, ptr addrspace(1 ) %ptr %insert = insertelement <2 x i16> %vec, i16 %val, i32 %idx - store <2 x i16> %insert, <2 x i16> addrspace(1)* null + store <2 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v2i16_v_s(<2 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_s_v2i16_v_s(ptr addrspace(4) inreg %ptr, i16 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_s_v2i16_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 @@ -267,13 +267,13 @@ define amdgpu_ps void @insertelement_s_v2i16_v_s(<2 x i16> addrspace(4)* inreg % ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <2 x i16>, <2 x i16> addrspace(4)* %ptr + %vec = load <2 x i16>, ptr addrspace(4) %ptr %insert = insertelement <2 x i16> %vec, i16 %val, i32 %idx - store <2 x i16> %insert, <2 x i16> addrspace(1)* null + store <2 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v2i16_s_v(<2 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 %idx) { +define amdgpu_ps void @insertelement_s_v2i16_s_v(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v2i16_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 @@ -360,13 +360,13 @@ define amdgpu_ps void @insertelement_s_v2i16_s_v(<2 x i16> addrspace(4)* inreg % ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <2 x i16>, <2 x i16> addrspace(4)* %ptr + %vec = load <2 x i16>, ptr addrspace(4) %ptr %insert = insertelement <2 x i16> %vec, i16 %val, i32 %idx - store <2 x i16> %insert, <2 x i16> addrspace(1)* null + store <2 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v2i16_v_v(<2 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 %idx) { +define amdgpu_ps void @insertelement_s_v2i16_v_v(ptr addrspace(4) inreg %ptr, i16 %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v2i16_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 @@ -450,13 +450,13 @@ define amdgpu_ps void @insertelement_s_v2i16_v_v(<2 x i16> addrspace(4)* inreg % ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <2 x i16>, <2 x i16> addrspace(4)* %ptr + %vec = load <2 x i16>, ptr addrspace(4) %ptr %insert = insertelement <2 x i16> %vec, i16 %val, i32 %idx - store <2 x i16> %insert, <2 x i16> addrspace(1)* null + store <2 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v2i16_s_v(<2 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 %idx) { +define amdgpu_ps void @insertelement_v_v2i16_s_v(ptr addrspace(1) %ptr, i16 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_v_v2i16_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dword v3, v[0:1], off @@ -543,13 +543,13 @@ define amdgpu_ps void @insertelement_v_v2i16_s_v(<2 x i16> addrspace(1)* %ptr, i ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <2 x i16>, <2 x i16> addrspace(1)* %ptr + %vec = load <2 x i16>, ptr addrspace(1) %ptr %insert = insertelement <2 x i16> %vec, i16 %val, i32 %idx - store <2 x i16> %insert, <2 x i16> addrspace(1)* null + store <2 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v2i16_v_s(<2 x i16> addrspace(1)* %ptr, i16 %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_v_v2i16_v_s(ptr addrspace(1) %ptr, i16 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_v_v2i16_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dword v3, v[0:1], off @@ -632,13 +632,13 @@ define amdgpu_ps void @insertelement_v_v2i16_v_s(<2 x i16> addrspace(1)* %ptr, i ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <2 x i16>, <2 x i16> addrspace(1)* %ptr + %vec = load <2 x i16>, ptr addrspace(1) %ptr %insert = insertelement <2 x i16> %vec, i16 %val, i32 %idx - store <2 x i16> %insert, <2 x i16> addrspace(1)* null + store <2 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v2i16_v_v(<2 x i16> addrspace(1)* %ptr, i16 %val, i32 %idx) { +define amdgpu_ps void @insertelement_v_v2i16_v_v(ptr addrspace(1) %ptr, i16 %val, i32 %idx) { ; GFX9-LABEL: insertelement_v_v2i16_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dword v4, v[0:1], off @@ -722,70 +722,70 @@ define amdgpu_ps void @insertelement_v_v2i16_v_v(<2 x i16> addrspace(1)* %ptr, i ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <2 x i16>, <2 x i16> addrspace(1)* %ptr + %vec = load <2 x i16>, ptr addrspace(1) %ptr %insert = insertelement <2 x i16> %vec, i16 %val, i32 %idx - store <2 x i16> %insert, <2 x i16> addrspace(1)* null + store <2 x i16> %insert, ptr addrspace(1) null ret void } ; FIXME: 3 element load/store legalization -; define amdgpu_ps void @insertelement_s_v3i16_s_s(<3 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 inreg %idx) { -; %vec = load <3 x i16>, <3 x i16> addrspace(4)* %ptr +; define amdgpu_ps void @insertelement_s_v3i16_s_s(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 inreg %idx) { +; %vec = load <3 x i16>, ptr addrspace(4) %ptr ; %insert = insertelement <3 x i16> %vec, i16 %val, i32 %idx -; store <3 x i16> %insert, <3 x i16> addrspace(1)* null +; store <3 x i16> %insert, ptr addrspace(1) null ; ret void ; } -; define amdgpu_ps void @insertelement_v_v3i16_s_s(<3 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 inreg %idx) { -; %vec = load <3 x i16>, <3 x i16> addrspace(1 )* %ptr +; define amdgpu_ps void @insertelement_v_v3i16_s_s(ptr addrspace(1) %ptr, i16 inreg %val, i32 inreg %idx) { +; %vec = load <3 x i16>, ptr addrspace(1 ) %ptr ; %insert = insertelement <3 x i16> %vec, i16 %val, i32 %idx -; store <3 x i16> %insert, <3 x i16> addrspace(1)* null +; store <3 x i16> %insert, ptr addrspace(1) null ; ret void ; } -; define amdgpu_ps void @insertelement_s_v3i16_v_s(<3 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 inreg %idx) { -; %vec = load <3 x i16>, <3 x i16> addrspace(4)* %ptr +; define amdgpu_ps void @insertelement_s_v3i16_v_s(ptr addrspace(4) inreg %ptr, i16 %val, i32 inreg %idx) { +; %vec = load <3 x i16>, ptr addrspace(4) %ptr ; %insert = insertelement <3 x i16> %vec, i16 %val, i32 %idx -; store <3 x i16> %insert, <3 x i16> addrspace(1)* null +; store <3 x i16> %insert, ptr addrspace(1) null ; ret void ; } -; define amdgpu_ps void @insertelement_s_v3i16_s_v(<3 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 %idx) { -; %vec = load <3 x i16>, <3 x i16> addrspace(4)* %ptr +; define amdgpu_ps void @insertelement_s_v3i16_s_v(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 %idx) { +; %vec = load <3 x i16>, ptr addrspace(4) %ptr ; %insert = insertelement <3 x i16> %vec, i16 %val, i32 %idx -; store <3 x i16> %insert, <3 x i16> addrspace(1)* null +; store <3 x i16> %insert, ptr addrspace(1) null ; ret void ; } -; define amdgpu_ps void @insertelement_s_v3i16_v_v(<3 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 %idx) { -; %vec = load <3 x i16>, <3 x i16> addrspace(4)* %ptr +; define amdgpu_ps void @insertelement_s_v3i16_v_v(ptr addrspace(4) inreg %ptr, i16 %val, i32 %idx) { +; %vec = load <3 x i16>, ptr addrspace(4) %ptr ; %insert = insertelement <3 x i16> %vec, i16 %val, i32 %idx -; store <3 x i16> %insert, <3 x i16> addrspace(1)* null +; store <3 x i16> %insert, ptr addrspace(1) null ; ret void ; } -; define amdgpu_ps void @insertelement_v_v3i16_s_v(<3 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 %idx) { -; %vec = load <3 x i16>, <3 x i16> addrspace(1)* %ptr +; define amdgpu_ps void @insertelement_v_v3i16_s_v(ptr addrspace(1) %ptr, i16 inreg %val, i32 %idx) { +; %vec = load <3 x i16>, ptr addrspace(1) %ptr ; %insert = insertelement <3 x i16> %vec, i16 %val, i32 %idx -; store <3 x i16> %insert, <3 x i16> addrspace(1)* null +; store <3 x i16> %insert, ptr addrspace(1) null ; ret void ; } -; define amdgpu_ps void @insertelement_v_v3i16_v_s(<3 x i16> addrspace(1)* %ptr, i16 %val, i32 inreg %idx) { -; %vec = load <3 x i16>, <3 x i16> addrspace(1)* %ptr +; define amdgpu_ps void @insertelement_v_v3i16_v_s(ptr addrspace(1) %ptr, i16 %val, i32 inreg %idx) { +; %vec = load <3 x i16>, ptr addrspace(1) %ptr ; %insert = insertelement <3 x i16> %vec, i16 %val, i32 %idx -; store <3 x i16> %insert, <3 x i16> addrspace(1)* null +; store <3 x i16> %insert, ptr addrspace(1) null ; ret void ; } -; define amdgpu_ps void @insertelement_v_v3i16_v_v(<3 x i16> addrspace(1)* %ptr, i16 %val, i32 %idx) { -; %vec = load <3 x i16>, <3 x i16> addrspace(1)* %ptr +; define amdgpu_ps void @insertelement_v_v3i16_v_v(ptr addrspace(1) %ptr, i16 %val, i32 %idx) { +; %vec = load <3 x i16>, ptr addrspace(1) %ptr ; %insert = insertelement <3 x i16> %vec, i16 %val, i32 %idx -; store <3 x i16> %insert, <3 x i16> addrspace(1)* null +; store <3 x i16> %insert, ptr addrspace(1) null ; ret void ; } -define amdgpu_ps void @insertelement_v_v4i16_s_s(<4 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_v_v4i16_s_s(ptr addrspace(1) %ptr, i16 inreg %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_v_v4i16_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off @@ -900,13 +900,13 @@ define amdgpu_ps void @insertelement_v_v4i16_s_s(<4 x i16> addrspace(1)* %ptr, i ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <4 x i16>, <4 x i16> addrspace(1 )* %ptr + %vec = load <4 x i16>, ptr addrspace(1 ) %ptr %insert = insertelement <4 x i16> %vec, i16 %val, i32 %idx - store <4 x i16> %insert, <4 x i16> addrspace(1)* null + store <4 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v4i16_v_s(<4 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_s_v4i16_v_s(ptr addrspace(4) inreg %ptr, i16 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_s_v4i16_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -1031,13 +1031,13 @@ define amdgpu_ps void @insertelement_s_v4i16_v_s(<4 x i16> addrspace(4)* inreg % ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <4 x i16>, <4 x i16> addrspace(4)* %ptr + %vec = load <4 x i16>, ptr addrspace(4) %ptr %insert = insertelement <4 x i16> %vec, i16 %val, i32 %idx - store <4 x i16> %insert, <4 x i16> addrspace(1)* null + store <4 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v4i16_s_v(<4 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 %idx) { +define amdgpu_ps void @insertelement_s_v4i16_s_v(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v4i16_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -1171,13 +1171,13 @@ define amdgpu_ps void @insertelement_s_v4i16_s_v(<4 x i16> addrspace(4)* inreg % ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <4 x i16>, <4 x i16> addrspace(4)* %ptr + %vec = load <4 x i16>, ptr addrspace(4) %ptr %insert = insertelement <4 x i16> %vec, i16 %val, i32 %idx - store <4 x i16> %insert, <4 x i16> addrspace(1)* null + store <4 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v4i16_v_v(<4 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 %idx) { +define amdgpu_ps void @insertelement_s_v4i16_v_v(ptr addrspace(4) inreg %ptr, i16 %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v4i16_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -1309,13 +1309,13 @@ define amdgpu_ps void @insertelement_s_v4i16_v_v(<4 x i16> addrspace(4)* inreg % ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <4 x i16>, <4 x i16> addrspace(4)* %ptr + %vec = load <4 x i16>, ptr addrspace(4) %ptr %insert = insertelement <4 x i16> %vec, i16 %val, i32 %idx - store <4 x i16> %insert, <4 x i16> addrspace(1)* null + store <4 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v4i16_s_v(<4 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 %idx) { +define amdgpu_ps void @insertelement_v_v4i16_s_v(ptr addrspace(1) %ptr, i16 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_v_v4i16_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off @@ -1433,13 +1433,13 @@ define amdgpu_ps void @insertelement_v_v4i16_s_v(<4 x i16> addrspace(1)* %ptr, i ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <4 x i16>, <4 x i16> addrspace(1)* %ptr + %vec = load <4 x i16>, ptr addrspace(1) %ptr %insert = insertelement <4 x i16> %vec, i16 %val, i32 %idx - store <4 x i16> %insert, <4 x i16> addrspace(1)* null + store <4 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v4i16_v_s(<4 x i16> addrspace(1)* %ptr, i16 %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_v_v4i16_v_s(ptr addrspace(1) %ptr, i16 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_v_v4i16_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off @@ -1552,13 +1552,13 @@ define amdgpu_ps void @insertelement_v_v4i16_v_s(<4 x i16> addrspace(1)* %ptr, i ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <4 x i16>, <4 x i16> addrspace(1)* %ptr + %vec = load <4 x i16>, ptr addrspace(1) %ptr %insert = insertelement <4 x i16> %vec, i16 %val, i32 %idx - store <4 x i16> %insert, <4 x i16> addrspace(1)* null + store <4 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v4i16_v_v(<4 x i16> addrspace(1)* %ptr, i16 %val, i32 %idx) { +define amdgpu_ps void @insertelement_v_v4i16_v_v(ptr addrspace(1) %ptr, i16 %val, i32 %idx) { ; GFX9-LABEL: insertelement_v_v4i16_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off @@ -1674,13 +1674,13 @@ define amdgpu_ps void @insertelement_v_v4i16_v_v(<4 x i16> addrspace(1)* %ptr, i ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <4 x i16>, <4 x i16> addrspace(1)* %ptr + %vec = load <4 x i16>, ptr addrspace(1) %ptr %insert = insertelement <4 x i16> %vec, i16 %val, i32 %idx - store <4 x i16> %insert, <4 x i16> addrspace(1)* null + store <4 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v8i16_s_s(<8 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_s_v8i16_s_s(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_s_v8i16_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -1857,13 +1857,13 @@ define amdgpu_ps void @insertelement_s_v8i16_s_s(<8 x i16> addrspace(4)* inreg % ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %vec = load <8 x i16>, ptr addrspace(4) %ptr %insert = insertelement <8 x i16> %vec, i16 %val, i32 %idx - store <8 x i16> %insert, <8 x i16> addrspace(1)* null + store <8 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v8i16_s_s(<8 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_v_v8i16_s_s(ptr addrspace(1) %ptr, i16 inreg %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_v_v8i16_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off @@ -2012,13 +2012,13 @@ define amdgpu_ps void @insertelement_v_v8i16_s_s(<8 x i16> addrspace(1)* %ptr, i ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <8 x i16>, <8 x i16> addrspace(1 )* %ptr + %vec = load <8 x i16>, ptr addrspace(1 ) %ptr %insert = insertelement <8 x i16> %vec, i16 %val, i32 %idx - store <8 x i16> %insert, <8 x i16> addrspace(1)* null + store <8 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v8i16_v_s(<8 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_s_v8i16_v_s(ptr addrspace(4) inreg %ptr, i16 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_s_v8i16_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -2194,13 +2194,13 @@ define amdgpu_ps void @insertelement_s_v8i16_v_s(<8 x i16> addrspace(4)* inreg % ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %vec = load <8 x i16>, ptr addrspace(4) %ptr %insert = insertelement <8 x i16> %vec, i16 %val, i32 %idx - store <8 x i16> %insert, <8 x i16> addrspace(1)* null + store <8 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v8i16_s_v(<8 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 %idx) { +define amdgpu_ps void @insertelement_s_v8i16_s_v(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v8i16_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0 @@ -2381,13 +2381,13 @@ define amdgpu_ps void @insertelement_s_v8i16_s_v(<8 x i16> addrspace(4)* inreg % ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %vec = load <8 x i16>, ptr addrspace(4) %ptr %insert = insertelement <8 x i16> %vec, i16 %val, i32 %idx - store <8 x i16> %insert, <8 x i16> addrspace(1)* null + store <8 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v8i16_v_v(<8 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 %idx) { +define amdgpu_ps void @insertelement_s_v8i16_v_v(ptr addrspace(4) inreg %ptr, i16 %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v8i16_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 @@ -2568,13 +2568,13 @@ define amdgpu_ps void @insertelement_s_v8i16_v_v(<8 x i16> addrspace(4)* inreg % ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %vec = load <8 x i16>, ptr addrspace(4) %ptr %insert = insertelement <8 x i16> %vec, i16 %val, i32 %idx - store <8 x i16> %insert, <8 x i16> addrspace(1)* null + store <8 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v8i16_s_v(<8 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 %idx) { +define amdgpu_ps void @insertelement_v_v8i16_s_v(ptr addrspace(1) %ptr, i16 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_v_v8i16_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off @@ -2724,13 +2724,13 @@ define amdgpu_ps void @insertelement_v_v8i16_s_v(<8 x i16> addrspace(1)* %ptr, i ; GFX11-NEXT: global_store_b128 v[7:8], v[0:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %vec = load <8 x i16>, ptr addrspace(1) %ptr %insert = insertelement <8 x i16> %vec, i16 %val, i32 %idx - store <8 x i16> %insert, <8 x i16> addrspace(1)* null + store <8 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v8i16_v_s(<8 x i16> addrspace(1)* %ptr, i16 %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_v_v8i16_v_s(ptr addrspace(1) %ptr, i16 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_v_v8i16_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off @@ -2876,13 +2876,13 @@ define amdgpu_ps void @insertelement_v_v8i16_v_s(<8 x i16> addrspace(1)* %ptr, i ; GFX11-NEXT: global_store_b128 v[7:8], v[0:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %vec = load <8 x i16>, ptr addrspace(1) %ptr %insert = insertelement <8 x i16> %vec, i16 %val, i32 %idx - store <8 x i16> %insert, <8 x i16> addrspace(1)* null + store <8 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v8i16_v_v(<8 x i16> addrspace(1)* %ptr, i16 %val, i32 %idx) { +define amdgpu_ps void @insertelement_v_v8i16_v_v(ptr addrspace(1) %ptr, i16 %val, i32 %idx) { ; GFX9-LABEL: insertelement_v_v8i16_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off @@ -3029,13 +3029,13 @@ define amdgpu_ps void @insertelement_v_v8i16_v_v(<8 x i16> addrspace(1)* %ptr, i ; GFX11-NEXT: global_store_b128 v[8:9], v[0:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %vec = load <8 x i16>, ptr addrspace(1) %ptr %insert = insertelement <8 x i16> %vec, i16 %val, i32 %idx - store <8 x i16> %insert, <8 x i16> addrspace(1)* null + store <8 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v16i16_s_s(<16 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_s_v16i16_s_s(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_s_v16i16_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[8:15], s[2:3], 0x0 @@ -3212,13 +3212,13 @@ define amdgpu_ps void @insertelement_s_v16i16_s_s(<16 x i16> addrspace(4)* inreg ; GFX11-NEXT: global_store_b128 v[10:11], v[4:7], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <16 x i16>, <16 x i16> addrspace(4)* %ptr + %vec = load <16 x i16>, ptr addrspace(4) %ptr %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx - store <16 x i16> %insert, <16 x i16> addrspace(1)* null + store <16 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v16i16_s_s(<16 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_v_v16i16_s_s(ptr addrspace(1) %ptr, i16 inreg %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_v_v16i16_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off @@ -3366,13 +3366,13 @@ define amdgpu_ps void @insertelement_v_v16i16_s_s(<16 x i16> addrspace(1)* %ptr, ; GFX11-NEXT: global_store_b128 v[10:11], v[6:9], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <16 x i16>, <16 x i16> addrspace(1 )* %ptr + %vec = load <16 x i16>, ptr addrspace(1 ) %ptr %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx - store <16 x i16> %insert, <16 x i16> addrspace(1)* null + store <16 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v16i16_v_s(<16 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_s_v16i16_v_s(ptr addrspace(4) inreg %ptr, i16 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_s_v16i16_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[8:15], s[2:3], 0x0 @@ -3547,13 +3547,13 @@ define amdgpu_ps void @insertelement_s_v16i16_v_s(<16 x i16> addrspace(4)* inreg ; GFX11-NEXT: global_store_b128 v[10:11], v[4:7], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <16 x i16>, <16 x i16> addrspace(4)* %ptr + %vec = load <16 x i16>, ptr addrspace(4) %ptr %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx - store <16 x i16> %insert, <16 x i16> addrspace(1)* null + store <16 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v16i16_s_v(<16 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 %idx) { +define amdgpu_ps void @insertelement_s_v16i16_s_v(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v16i16_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[16:23], s[2:3], 0x0 @@ -3841,13 +3841,13 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(<16 x i16> addrspace(4)* inreg ; GFX11-NEXT: global_store_b128 v[10:11], v[4:7], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <16 x i16>, <16 x i16> addrspace(4)* %ptr + %vec = load <16 x i16>, ptr addrspace(4) %ptr %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx - store <16 x i16> %insert, <16 x i16> addrspace(1)* null + store <16 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v16i16_v_v(<16 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 %idx) { +define amdgpu_ps void @insertelement_s_v16i16_v_v(ptr addrspace(4) inreg %ptr, i16 %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v16i16_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[12:19], s[2:3], 0x0 @@ -4135,13 +4135,13 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(<16 x i16> addrspace(4)* inreg ; GFX11-NEXT: global_store_b128 v[10:11], v[4:7], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <16 x i16>, <16 x i16> addrspace(4)* %ptr + %vec = load <16 x i16>, ptr addrspace(4) %ptr %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx - store <16 x i16> %insert, <16 x i16> addrspace(1)* null + store <16 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v16i16_s_v(<16 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 %idx) { +define amdgpu_ps void @insertelement_v_v16i16_s_v(ptr addrspace(1) %ptr, i16 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_v_v16i16_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off @@ -4381,13 +4381,13 @@ define amdgpu_ps void @insertelement_v_v16i16_s_v(<16 x i16> addrspace(1)* %ptr, ; GFX11-NEXT: global_store_b128 v[13:14], v[4:7], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <16 x i16>, <16 x i16> addrspace(1)* %ptr + %vec = load <16 x i16>, ptr addrspace(1) %ptr %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx - store <16 x i16> %insert, <16 x i16> addrspace(1)* null + store <16 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v16i16_v_s(<16 x i16> addrspace(1)* %ptr, i16 %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_v_v16i16_v_s(ptr addrspace(1) %ptr, i16 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_v_v16i16_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off @@ -4532,13 +4532,13 @@ define amdgpu_ps void @insertelement_v_v16i16_v_s(<16 x i16> addrspace(1)* %ptr, ; GFX11-NEXT: global_store_b128 v[11:12], v[7:10], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <16 x i16>, <16 x i16> addrspace(1)* %ptr + %vec = load <16 x i16>, ptr addrspace(1) %ptr %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx - store <16 x i16> %insert, <16 x i16> addrspace(1)* null + store <16 x i16> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v16i16_v_v(<16 x i16> addrspace(1)* %ptr, i16 %val, i32 %idx) { +define amdgpu_ps void @insertelement_v_v16i16_v_v(ptr addrspace(1) %ptr, i16 %val, i32 %idx) { ; GFX9-LABEL: insertelement_v_v16i16_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off @@ -4775,8 +4775,8 @@ define amdgpu_ps void @insertelement_v_v16i16_v_v(<16 x i16> addrspace(1)* %ptr, ; GFX11-NEXT: global_store_b128 v[14:15], v[4:7], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <16 x i16>, <16 x i16> addrspace(1)* %ptr + %vec = load <16 x i16>, ptr addrspace(1) %ptr %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx - store <16 x i16> %insert, <16 x i16> addrspace(1)* null + store <16 x i16> %insert, ptr addrspace(1) null ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll index d0885fa79e55d..bc06c38c984fc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll @@ -5,7 +5,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s -define amdgpu_ps void @insertelement_s_v2i8_s_s(<2 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_s_v2i8_s_s(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_s_v2i8_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, 0 @@ -107,13 +107,13 @@ define amdgpu_ps void @insertelement_s_v2i8_s_s(<2 x i8> addrspace(4)* inreg %pt ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <2 x i8>, <2 x i8> addrspace(4)* %ptr + %vec = load <2 x i8>, ptr addrspace(4) %ptr %insert = insertelement <2 x i8> %vec, i8 %val, i32 %idx - store <2 x i8> %insert, <2 x i8> addrspace(1)* null + store <2 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v2i8_s_s(<2 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_v_v2i8_s_s(ptr addrspace(1) %ptr, i8 inreg %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_v_v2i8_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_ushort v0, v[0:1], off @@ -209,13 +209,13 @@ define amdgpu_ps void @insertelement_v_v2i8_s_s(<2 x i8> addrspace(1)* %ptr, i8 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <2 x i8>, <2 x i8> addrspace(1 )* %ptr + %vec = load <2 x i8>, ptr addrspace(1 ) %ptr %insert = insertelement <2 x i8> %vec, i8 %val, i32 %idx - store <2 x i8> %insert, <2 x i8> addrspace(1)* null + store <2 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v2i8_v_s(<2 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_s_v2i8_v_s(ptr addrspace(4) inreg %ptr, i8 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_s_v2i8_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v1, 0 @@ -313,13 +313,13 @@ define amdgpu_ps void @insertelement_s_v2i8_v_s(<2 x i8> addrspace(4)* inreg %pt ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <2 x i8>, <2 x i8> addrspace(4)* %ptr + %vec = load <2 x i8>, ptr addrspace(4) %ptr %insert = insertelement <2 x i8> %vec, i8 %val, i32 %idx - store <2 x i8> %insert, <2 x i8> addrspace(1)* null + store <2 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v2i8_s_v(<2 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 %idx) { +define amdgpu_ps void @insertelement_s_v2i8_s_v(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v2i8_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v1, 0 @@ -421,13 +421,13 @@ define amdgpu_ps void @insertelement_s_v2i8_s_v(<2 x i8> addrspace(4)* inreg %pt ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <2 x i8>, <2 x i8> addrspace(4)* %ptr + %vec = load <2 x i8>, ptr addrspace(4) %ptr %insert = insertelement <2 x i8> %vec, i8 %val, i32 %idx - store <2 x i8> %insert, <2 x i8> addrspace(1)* null + store <2 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v2i8_v_v(<2 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 %idx) { +define amdgpu_ps void @insertelement_s_v2i8_v_v(ptr addrspace(4) inreg %ptr, i8 %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v2i8_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v2, 0 @@ -525,13 +525,13 @@ define amdgpu_ps void @insertelement_s_v2i8_v_v(<2 x i8> addrspace(4)* inreg %pt ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <2 x i8>, <2 x i8> addrspace(4)* %ptr + %vec = load <2 x i8>, ptr addrspace(4) %ptr %insert = insertelement <2 x i8> %vec, i8 %val, i32 %idx - store <2 x i8> %insert, <2 x i8> addrspace(1)* null + store <2 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v2i8_s_v(<2 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 %idx) { +define amdgpu_ps void @insertelement_v_v2i8_s_v(ptr addrspace(1) %ptr, i8 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_v_v2i8_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_ushort v0, v[0:1], off @@ -627,13 +627,13 @@ define amdgpu_ps void @insertelement_v_v2i8_s_v(<2 x i8> addrspace(1)* %ptr, i8 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <2 x i8>, <2 x i8> addrspace(1)* %ptr + %vec = load <2 x i8>, ptr addrspace(1) %ptr %insert = insertelement <2 x i8> %vec, i8 %val, i32 %idx - store <2 x i8> %insert, <2 x i8> addrspace(1)* null + store <2 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v2i8_v_s(<2 x i8> addrspace(1)* %ptr, i8 %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_v_v2i8_v_s(ptr addrspace(1) %ptr, i8 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_v_v2i8_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_ushort v0, v[0:1], off @@ -726,13 +726,13 @@ define amdgpu_ps void @insertelement_v_v2i8_v_s(<2 x i8> addrspace(1)* %ptr, i8 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <2 x i8>, <2 x i8> addrspace(1)* %ptr + %vec = load <2 x i8>, ptr addrspace(1) %ptr %insert = insertelement <2 x i8> %vec, i8 %val, i32 %idx - store <2 x i8> %insert, <2 x i8> addrspace(1)* null + store <2 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v2i8_v_v(<2 x i8> addrspace(1)* %ptr, i8 %val, i32 %idx) { +define amdgpu_ps void @insertelement_v_v2i8_v_v(ptr addrspace(1) %ptr, i8 %val, i32 %idx) { ; GFX9-LABEL: insertelement_v_v2i8_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_ushort v0, v[0:1], off @@ -825,70 +825,70 @@ define amdgpu_ps void @insertelement_v_v2i8_v_v(<2 x i8> addrspace(1)* %ptr, i8 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <2 x i8>, <2 x i8> addrspace(1)* %ptr + %vec = load <2 x i8>, ptr addrspace(1) %ptr %insert = insertelement <2 x i8> %vec, i8 %val, i32 %idx - store <2 x i8> %insert, <2 x i8> addrspace(1)* null + store <2 x i8> %insert, ptr addrspace(1) null ret void } ; FIXME: 3 element load/store legalization -; define amdgpu_ps void @insertelement_s_v3i8_s_s(<3 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 inreg %idx) { -; %vec = load <3 x i8>, <3 x i8> addrspace(4)* %ptr +; define amdgpu_ps void @insertelement_s_v3i8_s_s(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 inreg %idx) { +; %vec = load <3 x i8>, ptr addrspace(4) %ptr ; %insert = insertelement <3 x i8> %vec, i8 %val, i32 %idx -; store <3 x i8> %insert, <3 x i8> addrspace(1)* null +; store <3 x i8> %insert, ptr addrspace(1) null ; ret void ; } -; define amdgpu_ps void @insertelement_v_v3i8_s_s(<3 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 inreg %idx) { -; %vec = load <3 x i8>, <3 x i8> addrspace(1 )* %ptr +; define amdgpu_ps void @insertelement_v_v3i8_s_s(ptr addrspace(1) %ptr, i8 inreg %val, i32 inreg %idx) { +; %vec = load <3 x i8>, ptr addrspace(1 ) %ptr ; %insert = insertelement <3 x i8> %vec, i8 %val, i32 %idx -; store <3 x i8> %insert, <3 x i8> addrspace(1)* null +; store <3 x i8> %insert, ptr addrspace(1) null ; ret void ; } -; define amdgpu_ps void @insertelement_s_v3i8_v_s(<3 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 inreg %idx) { -; %vec = load <3 x i8>, <3 x i8> addrspace(4)* %ptr +; define amdgpu_ps void @insertelement_s_v3i8_v_s(ptr addrspace(4) inreg %ptr, i8 %val, i32 inreg %idx) { +; %vec = load <3 x i8>, ptr addrspace(4) %ptr ; %insert = insertelement <3 x i8> %vec, i8 %val, i32 %idx -; store <3 x i8> %insert, <3 x i8> addrspace(1)* null +; store <3 x i8> %insert, ptr addrspace(1) null ; ret void ; } -; define amdgpu_ps void @insertelement_s_v3i8_s_v(<3 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 %idx) { -; %vec = load <3 x i8>, <3 x i8> addrspace(4)* %ptr +; define amdgpu_ps void @insertelement_s_v3i8_s_v(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 %idx) { +; %vec = load <3 x i8>, ptr addrspace(4) %ptr ; %insert = insertelement <3 x i8> %vec, i8 %val, i32 %idx -; store <3 x i8> %insert, <3 x i8> addrspace(1)* null +; store <3 x i8> %insert, ptr addrspace(1) null ; ret void ; } -; define amdgpu_ps void @insertelement_s_v3i8_v_v(<3 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 %idx) { -; %vec = load <3 x i8>, <3 x i8> addrspace(4)* %ptr +; define amdgpu_ps void @insertelement_s_v3i8_v_v(ptr addrspace(4) inreg %ptr, i8 %val, i32 %idx) { +; %vec = load <3 x i8>, ptr addrspace(4) %ptr ; %insert = insertelement <3 x i8> %vec, i8 %val, i32 %idx -; store <3 x i8> %insert, <3 x i8> addrspace(1)* null +; store <3 x i8> %insert, ptr addrspace(1) null ; ret void ; } -; define amdgpu_ps void @insertelement_v_v3i8_s_v(<3 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 %idx) { -; %vec = load <3 x i8>, <3 x i8> addrspace(1)* %ptr +; define amdgpu_ps void @insertelement_v_v3i8_s_v(ptr addrspace(1) %ptr, i8 inreg %val, i32 %idx) { +; %vec = load <3 x i8>, ptr addrspace(1) %ptr ; %insert = insertelement <3 x i8> %vec, i8 %val, i32 %idx -; store <3 x i8> %insert, <3 x i8> addrspace(1)* null +; store <3 x i8> %insert, ptr addrspace(1) null ; ret void ; } -; define amdgpu_ps void @insertelement_v_v3i8_v_s(<3 x i8> addrspace(1)* %ptr, i8 %val, i32 inreg %idx) { -; %vec = load <3 x i8>, <3 x i8> addrspace(1)* %ptr +; define amdgpu_ps void @insertelement_v_v3i8_v_s(ptr addrspace(1) %ptr, i8 %val, i32 inreg %idx) { +; %vec = load <3 x i8>, ptr addrspace(1) %ptr ; %insert = insertelement <3 x i8> %vec, i8 %val, i32 %idx -; store <3 x i8> %insert, <3 x i8> addrspace(1)* null +; store <3 x i8> %insert, ptr addrspace(1) null ; ret void ; } -; define amdgpu_ps void @insertelement_v_v3i8_v_v(<3 x i8> addrspace(1)* %ptr, i8 %val, i32 %idx) { -; %vec = load <3 x i8>, <3 x i8> addrspace(1)* %ptr +; define amdgpu_ps void @insertelement_v_v3i8_v_v(ptr addrspace(1) %ptr, i8 %val, i32 %idx) { +; %vec = load <3 x i8>, ptr addrspace(1) %ptr ; %insert = insertelement <3 x i8> %vec, i8 %val, i32 %idx -; store <3 x i8> %insert, <3 x i8> addrspace(1)* null +; store <3 x i8> %insert, ptr addrspace(1) null ; ret void ; } -define amdgpu_ps void @insertelement_v_v4i8_s_s(<4 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_v_v4i8_s_s(ptr addrspace(1) %ptr, i8 inreg %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_v_v4i8_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dword v2, v[0:1], off @@ -974,13 +974,13 @@ define amdgpu_ps void @insertelement_v_v4i8_s_s(<4 x i8> addrspace(1)* %ptr, i8 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <4 x i8>, <4 x i8> addrspace(1 )* %ptr + %vec = load <4 x i8>, ptr addrspace(1 ) %ptr %insert = insertelement <4 x i8> %vec, i8 %val, i32 %idx - store <4 x i8> %insert, <4 x i8> addrspace(1)* null + store <4 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v4i8_v_s(<4 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_s_v4i8_v_s(ptr addrspace(4) inreg %ptr, i8 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_s_v4i8_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 @@ -1061,13 +1061,13 @@ define amdgpu_ps void @insertelement_s_v4i8_v_s(<4 x i8> addrspace(4)* inreg %pt ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <4 x i8>, <4 x i8> addrspace(4)* %ptr + %vec = load <4 x i8>, ptr addrspace(4) %ptr %insert = insertelement <4 x i8> %vec, i8 %val, i32 %idx - store <4 x i8> %insert, <4 x i8> addrspace(1)* null + store <4 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v4i8_s_v(<4 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 %idx) { +define amdgpu_ps void @insertelement_s_v4i8_s_v(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v4i8_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 @@ -1155,13 +1155,13 @@ define amdgpu_ps void @insertelement_s_v4i8_s_v(<4 x i8> addrspace(4)* inreg %pt ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <4 x i8>, <4 x i8> addrspace(4)* %ptr + %vec = load <4 x i8>, ptr addrspace(4) %ptr %insert = insertelement <4 x i8> %vec, i8 %val, i32 %idx - store <4 x i8> %insert, <4 x i8> addrspace(1)* null + store <4 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v4i8_v_v(<4 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 %idx) { +define amdgpu_ps void @insertelement_s_v4i8_v_v(ptr addrspace(4) inreg %ptr, i8 %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v4i8_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 @@ -1246,13 +1246,13 @@ define amdgpu_ps void @insertelement_s_v4i8_v_v(<4 x i8> addrspace(4)* inreg %pt ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <4 x i8>, <4 x i8> addrspace(4)* %ptr + %vec = load <4 x i8>, ptr addrspace(4) %ptr %insert = insertelement <4 x i8> %vec, i8 %val, i32 %idx - store <4 x i8> %insert, <4 x i8> addrspace(1)* null + store <4 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v4i8_s_v(<4 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 %idx) { +define amdgpu_ps void @insertelement_v_v4i8_s_v(ptr addrspace(1) %ptr, i8 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_v_v4i8_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dword v3, v[0:1], off @@ -1341,13 +1341,13 @@ define amdgpu_ps void @insertelement_v_v4i8_s_v(<4 x i8> addrspace(1)* %ptr, i8 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %vec = load <4 x i8>, ptr addrspace(1) %ptr %insert = insertelement <4 x i8> %vec, i8 %val, i32 %idx - store <4 x i8> %insert, <4 x i8> addrspace(1)* null + store <4 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v4i8_v_s(<4 x i8> addrspace(1)* %ptr, i8 %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_v_v4i8_v_s(ptr addrspace(1) %ptr, i8 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_v_v4i8_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dword v3, v[0:1], off @@ -1432,13 +1432,13 @@ define amdgpu_ps void @insertelement_v_v4i8_v_s(<4 x i8> addrspace(1)* %ptr, i8 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %vec = load <4 x i8>, ptr addrspace(1) %ptr %insert = insertelement <4 x i8> %vec, i8 %val, i32 %idx - store <4 x i8> %insert, <4 x i8> addrspace(1)* null + store <4 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v4i8_v_v(<4 x i8> addrspace(1)* %ptr, i8 %val, i32 %idx) { +define amdgpu_ps void @insertelement_v_v4i8_v_v(ptr addrspace(1) %ptr, i8 %val, i32 %idx) { ; GFX9-LABEL: insertelement_v_v4i8_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dword v4, v[0:1], off @@ -1524,13 +1524,13 @@ define amdgpu_ps void @insertelement_v_v4i8_v_v(<4 x i8> addrspace(1)* %ptr, i8 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %vec = load <4 x i8>, ptr addrspace(1) %ptr %insert = insertelement <4 x i8> %vec, i8 %val, i32 %idx - store <4 x i8> %insert, <4 x i8> addrspace(1)* null + store <4 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v8i8_s_s(<8 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_s_v8i8_s_s(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_s_v8i8_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -1658,13 +1658,13 @@ define amdgpu_ps void @insertelement_s_v8i8_s_s(<8 x i8> addrspace(4)* inreg %pt ; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %vec = load <8 x i8>, ptr addrspace(4) %ptr %insert = insertelement <8 x i8> %vec, i8 %val, i32 %idx - store <8 x i8> %insert, <8 x i8> addrspace(1)* null + store <8 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v8i8_s_s(<8 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_v_v8i8_s_s(ptr addrspace(1) %ptr, i8 inreg %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_v_v8i8_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off @@ -1781,13 +1781,13 @@ define amdgpu_ps void @insertelement_v_v8i8_s_s(<8 x i8> addrspace(1)* %ptr, i8 ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <8 x i8>, <8 x i8> addrspace(1 )* %ptr + %vec = load <8 x i8>, ptr addrspace(1 ) %ptr %insert = insertelement <8 x i8> %vec, i8 %val, i32 %idx - store <8 x i8> %insert, <8 x i8> addrspace(1)* null + store <8 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v8i8_v_s(<8 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_s_v8i8_v_s(ptr addrspace(4) inreg %ptr, i8 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_s_v8i8_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -1913,13 +1913,13 @@ define amdgpu_ps void @insertelement_s_v8i8_v_s(<8 x i8> addrspace(4)* inreg %pt ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %vec = load <8 x i8>, ptr addrspace(4) %ptr %insert = insertelement <8 x i8> %vec, i8 %val, i32 %idx - store <8 x i8> %insert, <8 x i8> addrspace(1)* null + store <8 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v8i8_s_v(<8 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 %idx) { +define amdgpu_ps void @insertelement_s_v8i8_s_v(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v8i8_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -2054,13 +2054,13 @@ define amdgpu_ps void @insertelement_s_v8i8_s_v(<8 x i8> addrspace(4)* inreg %pt ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %vec = load <8 x i8>, ptr addrspace(4) %ptr %insert = insertelement <8 x i8> %vec, i8 %val, i32 %idx - store <8 x i8> %insert, <8 x i8> addrspace(1)* null + store <8 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v8i8_v_v(<8 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 %idx) { +define amdgpu_ps void @insertelement_s_v8i8_v_v(ptr addrspace(4) inreg %ptr, i8 %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v8i8_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 @@ -2193,13 +2193,13 @@ define amdgpu_ps void @insertelement_s_v8i8_v_v(<8 x i8> addrspace(4)* inreg %pt ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %vec = load <8 x i8>, ptr addrspace(4) %ptr %insert = insertelement <8 x i8> %vec, i8 %val, i32 %idx - store <8 x i8> %insert, <8 x i8> addrspace(1)* null + store <8 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v8i8_s_v(<8 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 %idx) { +define amdgpu_ps void @insertelement_v_v8i8_s_v(ptr addrspace(1) %ptr, i8 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_v_v8i8_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off @@ -2319,13 +2319,13 @@ define amdgpu_ps void @insertelement_v_v8i8_s_v(<8 x i8> addrspace(1)* %ptr, i8 ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %vec = load <8 x i8>, ptr addrspace(1) %ptr %insert = insertelement <8 x i8> %vec, i8 %val, i32 %idx - store <8 x i8> %insert, <8 x i8> addrspace(1)* null + store <8 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v8i8_v_s(<8 x i8> addrspace(1)* %ptr, i8 %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_v_v8i8_v_s(ptr addrspace(1) %ptr, i8 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_v_v8i8_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off @@ -2440,13 +2440,13 @@ define amdgpu_ps void @insertelement_v_v8i8_v_s(<8 x i8> addrspace(1)* %ptr, i8 ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %vec = load <8 x i8>, ptr addrspace(1) %ptr %insert = insertelement <8 x i8> %vec, i8 %val, i32 %idx - store <8 x i8> %insert, <8 x i8> addrspace(1)* null + store <8 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v8i8_v_v(<8 x i8> addrspace(1)* %ptr, i8 %val, i32 %idx) { +define amdgpu_ps void @insertelement_v_v8i8_v_v(ptr addrspace(1) %ptr, i8 %val, i32 %idx) { ; GFX9-LABEL: insertelement_v_v8i8_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off @@ -2564,13 +2564,13 @@ define amdgpu_ps void @insertelement_v_v8i8_v_v(<8 x i8> addrspace(1)* %ptr, i8 ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %vec = load <8 x i8>, ptr addrspace(1) %ptr %insert = insertelement <8 x i8> %vec, i8 %val, i32 %idx - store <8 x i8> %insert, <8 x i8> addrspace(1)* null + store <8 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v16i8_s_s(<16 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_s_v16i8_s_s(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_s_v16i8_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -2747,13 +2747,13 @@ define amdgpu_ps void @insertelement_s_v16i8_s_s(<16 x i8> addrspace(4)* inreg % ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <16 x i8>, <16 x i8> addrspace(4)* %ptr + %vec = load <16 x i8>, ptr addrspace(4) %ptr %insert = insertelement <16 x i8> %vec, i8 %val, i32 %idx - store <16 x i8> %insert, <16 x i8> addrspace(1)* null + store <16 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v16i8_s_s(<16 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_v_v16i8_s_s(ptr addrspace(1) %ptr, i8 inreg %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_v_v16i8_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off @@ -2902,13 +2902,13 @@ define amdgpu_ps void @insertelement_v_v16i8_s_s(<16 x i8> addrspace(1)* %ptr, i ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <16 x i8>, <16 x i8> addrspace(1 )* %ptr + %vec = load <16 x i8>, ptr addrspace(1 ) %ptr %insert = insertelement <16 x i8> %vec, i8 %val, i32 %idx - store <16 x i8> %insert, <16 x i8> addrspace(1)* null + store <16 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v16i8_v_s(<16 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_s_v16i8_v_s(ptr addrspace(4) inreg %ptr, i8 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_s_v16i8_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -3084,13 +3084,13 @@ define amdgpu_ps void @insertelement_s_v16i8_v_s(<16 x i8> addrspace(4)* inreg % ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <16 x i8>, <16 x i8> addrspace(4)* %ptr + %vec = load <16 x i8>, ptr addrspace(4) %ptr %insert = insertelement <16 x i8> %vec, i8 %val, i32 %idx - store <16 x i8> %insert, <16 x i8> addrspace(1)* null + store <16 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v16i8_s_v(<16 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 %idx) { +define amdgpu_ps void @insertelement_s_v16i8_s_v(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v16i8_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0 @@ -3271,13 +3271,13 @@ define amdgpu_ps void @insertelement_s_v16i8_s_v(<16 x i8> addrspace(4)* inreg % ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <16 x i8>, <16 x i8> addrspace(4)* %ptr + %vec = load <16 x i8>, ptr addrspace(4) %ptr %insert = insertelement <16 x i8> %vec, i8 %val, i32 %idx - store <16 x i8> %insert, <16 x i8> addrspace(1)* null + store <16 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_s_v16i8_v_v(<16 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 %idx) { +define amdgpu_ps void @insertelement_s_v16i8_v_v(ptr addrspace(4) inreg %ptr, i8 %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v16i8_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 @@ -3458,13 +3458,13 @@ define amdgpu_ps void @insertelement_s_v16i8_v_v(<16 x i8> addrspace(4)* inreg % ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <16 x i8>, <16 x i8> addrspace(4)* %ptr + %vec = load <16 x i8>, ptr addrspace(4) %ptr %insert = insertelement <16 x i8> %vec, i8 %val, i32 %idx - store <16 x i8> %insert, <16 x i8> addrspace(1)* null + store <16 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v16i8_s_v(<16 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 %idx) { +define amdgpu_ps void @insertelement_v_v16i8_s_v(ptr addrspace(1) %ptr, i8 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_v_v16i8_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off @@ -3614,13 +3614,13 @@ define amdgpu_ps void @insertelement_v_v16i8_s_v(<16 x i8> addrspace(1)* %ptr, i ; GFX11-NEXT: global_store_b128 v[7:8], v[0:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vec = load <16 x i8>, ptr addrspace(1) %ptr %insert = insertelement <16 x i8> %vec, i8 %val, i32 %idx - store <16 x i8> %insert, <16 x i8> addrspace(1)* null + store <16 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v16i8_v_s(<16 x i8> addrspace(1)* %ptr, i8 %val, i32 inreg %idx) { +define amdgpu_ps void @insertelement_v_v16i8_v_s(ptr addrspace(1) %ptr, i8 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_v_v16i8_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off @@ -3766,13 +3766,13 @@ define amdgpu_ps void @insertelement_v_v16i8_v_s(<16 x i8> addrspace(1)* %ptr, i ; GFX11-NEXT: global_store_b128 v[7:8], v[0:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vec = load <16 x i8>, ptr addrspace(1) %ptr %insert = insertelement <16 x i8> %vec, i8 %val, i32 %idx - store <16 x i8> %insert, <16 x i8> addrspace(1)* null + store <16 x i8> %insert, ptr addrspace(1) null ret void } -define amdgpu_ps void @insertelement_v_v16i8_v_v(<16 x i8> addrspace(1)* %ptr, i8 %val, i32 %idx) { +define amdgpu_ps void @insertelement_v_v16i8_v_v(ptr addrspace(1) %ptr, i8 %val, i32 %idx) { ; GFX9-LABEL: insertelement_v_v16i8_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off @@ -3919,8 +3919,8 @@ define amdgpu_ps void @insertelement_v_v16i8_v_v(<16 x i8> addrspace(1)* %ptr, i ; GFX11-NEXT: global_store_b128 v[8:9], v[0:3], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %vec = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %vec = load <16 x i8>, ptr addrspace(1) %ptr %insert = insertelement <16 x i8> %vec, i8 %val, i32 %idx - store <16 x i8> %insert, <16 x i8> addrspace(1)* null + store <16 x i8> %insert, ptr addrspace(1) null ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll index c81ffc4790106..714bb5687ab88 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s -define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, <64 x i32> addrspace(1)* %ptr.out) #0 { +define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addrspace(1) %ptr.out) #0 { ; GCN-LABEL: v_insert_v64i32_37: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -152,11 +152,11 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.in = getelementptr <64 x i32>, <64 x i32> addrspace(1)* %ptr.in, i32 %id - %vec = load <64 x i32>, <64 x i32> addrspace(1)* %gep.in + %gep.in = getelementptr <64 x i32>, ptr addrspace(1) %ptr.in, i32 %id + %vec = load <64 x i32>, ptr addrspace(1) %gep.in %insert = insertelement <64 x i32> %vec, i32 999, i32 37 - %gep.out = getelementptr <64 x i32>, <64 x i32> addrspace(1)* %ptr.out, i32 %id - store <64 x i32> %insert, <64 x i32> addrspace(1)* %gep.out + %gep.out = getelementptr <64 x i32>, ptr addrspace(1) %ptr.out, i32 %id + store <64 x i32> %insert, ptr addrspace(1) %gep.out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll index f4b821ca602c7..b668c42e99fe2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -46,7 +46,7 @@ entry: ret <8 x i32> %insert } -define amdgpu_ps <8 x i8 addrspace(3)*> @dyn_insertelement_v8p3i8_s_s_s(<8 x i8 addrspace(3)*> inreg %vec, i8 addrspace(3)* inreg %val, i32 inreg %idx) { +define amdgpu_ps <8 x ptr addrspace(3)> @dyn_insertelement_v8p3i8_s_s_s(<8 x ptr addrspace(3)> inreg %vec, ptr addrspace(3) inreg %val, i32 inreg %idx) { ; GPRIDX-LABEL: dyn_insertelement_v8p3i8_s_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_cmp_eq_u32 s11, 0 @@ -81,8 +81,8 @@ define amdgpu_ps <8 x i8 addrspace(3)*> @dyn_insertelement_v8p3i8_s_s_s(<8 x i8 ; GFX10PLUS-NEXT: s_movreld_b32 s0, s10 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: - %insert = insertelement <8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx - ret <8 x i8 addrspace(3)*> %insert + %insert = insertelement <8 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 %idx + ret <8 x ptr addrspace(3)> %insert } define <8 x float> @dyn_insertelement_v8f32_const_s_v_v(float %val, i32 %idx) { @@ -624,7 +624,7 @@ entry: ret <8 x float> %insert } -define amdgpu_ps <8 x float> @dyn_insertelement_v8p3i8_v_v_s(<8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 inreg %idx) { +define amdgpu_ps <8 x float> @dyn_insertelement_v8p3i8_v_v_s(<8 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 inreg %idx) { ; GPRIDX-LABEL: dyn_insertelement_v8p3i8_v_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 @@ -651,8 +651,8 @@ define amdgpu_ps <8 x float> @dyn_insertelement_v8p3i8_v_v_s(<8 x i8 addrspace(3 ; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v8 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: - %insert = insertelement <8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx - %cast.0 = ptrtoint <8 x i8 addrspace(3)*> %insert to <8 x i32> + %insert = insertelement <8 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 %idx + %cast.0 = ptrtoint <8 x ptr addrspace(3)> %insert to <8 x i32> %cast.1 = bitcast <8 x i32> %cast.0 to <8 x float> ret <8 x float> %cast.1 } @@ -752,7 +752,7 @@ entry: ret <8 x i64> %insert } -define amdgpu_ps <8 x i8 addrspace(1)*> @dyn_insertelement_v8p1i8_s_s_s(<8 x i8 addrspace(1)*> inreg %vec, i8 addrspace(1)* inreg %val, i32 inreg %idx) { +define amdgpu_ps <8 x ptr addrspace(1)> @dyn_insertelement_v8p1i8_s_s_s(<8 x ptr addrspace(1)> inreg %vec, ptr addrspace(1) inreg %val, i32 inreg %idx) { ; GPRIDX-LABEL: dyn_insertelement_v8p1i8_s_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -798,8 +798,8 @@ define amdgpu_ps <8 x i8 addrspace(1)*> @dyn_insertelement_v8p1i8_s_s_s(<8 x i8 ; GFX10PLUS-NEXT: s_movreld_b64 s[0:1], s[18:19] ; GFX10PLUS-NEXT: ; return to shader part epilog entry: - %insert = insertelement <8 x i8 addrspace(1)*> %vec, i8 addrspace(1)* %val, i32 %idx - ret <8 x i8 addrspace(1)*> %insert + %insert = insertelement <8 x ptr addrspace(1)> %vec, ptr addrspace(1) %val, i32 %idx + ret <8 x ptr addrspace(1)> %insert } define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { @@ -996,10 +996,10 @@ entry: %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef + store volatile <2 x double> %vec.0, ptr addrspace(1) undef + store volatile <2 x double> %vec.1, ptr addrspace(1) undef + store volatile <2 x double> %vec.2, ptr addrspace(1) undef + store volatile <2 x double> %vec.3, ptr addrspace(1) undef ret void } @@ -1208,10 +1208,10 @@ entry: %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef + store volatile <2 x double> %vec.0, ptr addrspace(1) undef + store volatile <2 x double> %vec.1, ptr addrspace(1) undef + store volatile <2 x double> %vec.2, ptr addrspace(1) undef + store volatile <2 x double> %vec.3, ptr addrspace(1) undef ret void } @@ -1357,10 +1357,10 @@ entry: %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef + store volatile <2 x double> %vec.0, ptr addrspace(1) undef + store volatile <2 x double> %vec.1, ptr addrspace(1) undef + store volatile <2 x double> %vec.2, ptr addrspace(1) undef + store volatile <2 x double> %vec.3, ptr addrspace(1) undef ret void } @@ -1418,10 +1418,10 @@ entry: %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef + store volatile <2 x double> %vec.0, ptr addrspace(1) undef + store volatile <2 x double> %vec.1, ptr addrspace(1) undef + store volatile <2 x double> %vec.2, ptr addrspace(1) undef + store volatile <2 x double> %vec.3, ptr addrspace(1) undef ret void } @@ -1624,10 +1624,10 @@ entry: %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef + store volatile <2 x double> %vec.0, ptr addrspace(1) undef + store volatile <2 x double> %vec.1, ptr addrspace(1) undef + store volatile <2 x double> %vec.2, ptr addrspace(1) undef + store volatile <2 x double> %vec.3, ptr addrspace(1) undef ret void } @@ -1748,10 +1748,10 @@ entry: %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef + store volatile <2 x double> %vec.0, ptr addrspace(1) undef + store volatile <2 x double> %vec.1, ptr addrspace(1) undef + store volatile <2 x double> %vec.2, ptr addrspace(1) undef + store volatile <2 x double> %vec.3, ptr addrspace(1) undef ret void } @@ -1809,10 +1809,10 @@ entry: %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef + store volatile <2 x double> %vec.0, ptr addrspace(1) undef + store volatile <2 x double> %vec.1, ptr addrspace(1) undef + store volatile <2 x double> %vec.2, ptr addrspace(1) undef + store volatile <2 x double> %vec.3, ptr addrspace(1) undef ret void } @@ -1930,10 +1930,10 @@ entry: %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef + store volatile <2 x double> %vec.0, ptr addrspace(1) undef + store volatile <2 x double> %vec.1, ptr addrspace(1) undef + store volatile <2 x double> %vec.2, ptr addrspace(1) undef + store volatile <2 x double> %vec.3, ptr addrspace(1) undef ret void } @@ -2538,10 +2538,10 @@ entry: %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef + store volatile <2 x double> %vec.0, ptr addrspace(1) undef + store volatile <2 x double> %vec.1, ptr addrspace(1) undef + store volatile <2 x double> %vec.2, ptr addrspace(1) undef + store volatile <2 x double> %vec.3, ptr addrspace(1) undef ret void } @@ -2664,10 +2664,10 @@ entry: %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef - store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef + store volatile <2 x double> %vec.0, ptr addrspace(1) undef + store volatile <2 x double> %vec.1, ptr addrspace(1) undef + store volatile <2 x double> %vec.2, ptr addrspace(1) undef + store volatile <2 x double> %vec.3, ptr addrspace(1) undef ret void } @@ -4316,7 +4316,7 @@ entry: ret <7 x i32> %insert } -define amdgpu_ps <7 x i8 addrspace(3)*> @dyn_insertelement_v7p3i8_s_s_s(<7 x i8 addrspace(3)*> inreg %vec, i8 addrspace(3)* inreg %val, i32 inreg %idx) { +define amdgpu_ps <7 x ptr addrspace(3)> @dyn_insertelement_v7p3i8_s_s_s(<7 x ptr addrspace(3)> inreg %vec, ptr addrspace(3) inreg %val, i32 inreg %idx) { ; GPRIDX-LABEL: dyn_insertelement_v7p3i8_s_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 0 @@ -4353,8 +4353,8 @@ define amdgpu_ps <7 x i8 addrspace(3)*> @dyn_insertelement_v7p3i8_s_s_s(<7 x i8 ; GFX10PLUS-NEXT: s_cselect_b32 s6, s9, s8 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: - %insert = insertelement <7 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx - ret <7 x i8 addrspace(3)*> %insert + %insert = insertelement <7 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 %idx + ret <7 x ptr addrspace(3)> %insert } define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_s_v_s(<7 x float> inreg %vec, float %val, i32 inreg %idx) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll index b858aeb433925..25f4478702e82 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll @@ -21,7 +21,7 @@ define amdgpu_kernel void @default_kernel() { ; HSA-NEXT: frameInfo: define amdgpu_kernel void @workgroup_id_x() { %id = call i32 @llvm.amdgcn.workgroup.id.x() - store volatile i32 %id, i32 addrspace(1)* undef + store volatile i32 %id, ptr addrspace(1) undef ret void } @@ -35,7 +35,7 @@ define amdgpu_kernel void @workgroup_id_x() { ; HSA-NEXT: frameInfo: define amdgpu_kernel void @workgroup_id_y() { %id = call i32 @llvm.amdgcn.workgroup.id.y() - store volatile i32 %id, i32 addrspace(1)* undef + store volatile i32 %id, ptr addrspace(1) undef ret void } @@ -49,7 +49,7 @@ define amdgpu_kernel void @workgroup_id_y() { ; HSA-NEXT: frameInfo: define amdgpu_kernel void @workgroup_id_z() { %id = call i32 @llvm.amdgcn.workgroup.id.z() - store volatile i32 %id, i32 addrspace(1)* undef + store volatile i32 %id, ptr addrspace(1) undef ret void } @@ -63,9 +63,9 @@ define amdgpu_kernel void @workgroup_id_z() { ; HSA-NEXT: frameInfo: define amdgpu_kernel void @workgroup_id_xy() { %id0 = call i32 @llvm.amdgcn.workgroup.id.x() - store volatile i32 %id0, i32 addrspace(1)* undef + store volatile i32 %id0, ptr addrspace(1) undef %id1 = call i32 @llvm.amdgcn.workgroup.id.y() - store volatile i32 %id1, i32 addrspace(1)* undef + store volatile i32 %id1, ptr addrspace(1) undef ret void } @@ -79,11 +79,11 @@ define amdgpu_kernel void @workgroup_id_xy() { ; HSA-NEXT: frameInfo: define amdgpu_kernel void @workgroup_id_xyz() { %id0 = call i32 @llvm.amdgcn.workgroup.id.x() - store volatile i32 %id0, i32 addrspace(1)* undef + store volatile i32 %id0, ptr addrspace(1) undef %id1 = call i32 @llvm.amdgcn.workgroup.id.y() - store volatile i32 %id1, i32 addrspace(1)* undef + store volatile i32 %id1, ptr addrspace(1) undef %id2 = call i32 @llvm.amdgcn.workgroup.id.y() - store volatile i32 %id2, i32 addrspace(1)* undef + store volatile i32 %id2, ptr addrspace(1) undef ret void } @@ -97,9 +97,9 @@ define amdgpu_kernel void @workgroup_id_xyz() { ; HSA-NEXT: frameInfo: define amdgpu_kernel void @workgroup_id_yz() { %id0 = call i32 @llvm.amdgcn.workgroup.id.x() - store volatile i32 %id0, i32 addrspace(1)* undef + store volatile i32 %id0, ptr addrspace(1) undef %id1 = call i32 @llvm.amdgcn.workgroup.id.y() - store volatile i32 %id1, i32 addrspace(1)* undef + store volatile i32 %id1, ptr addrspace(1) undef ret void } @@ -113,9 +113,9 @@ define amdgpu_kernel void @workgroup_id_yz() { ; HSA-NEXT: frameInfo: define amdgpu_kernel void @workgroup_id_xz() { %id0 = call i32 @llvm.amdgcn.workgroup.id.x() - store volatile i32 %id0, i32 addrspace(1)* undef + store volatile i32 %id0, ptr addrspace(1) undef %id1 = call i32 @llvm.amdgcn.workgroup.id.z() - store volatile i32 %id1, i32 addrspace(1)* undef + store volatile i32 %id1, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll index 41ac2b984ef10..3ce609676b8bf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSA-VI %s ; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s -define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { +define amdgpu_kernel void @i8_arg(ptr addrspace(1) nocapture %out, i8 %in) nounwind { ; HSA-VI-LABEL: name: i8_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -32,11 +32,11 @@ define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) noun ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = zext i8 %in to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { +define amdgpu_kernel void @i8_zext_arg(ptr addrspace(1) nocapture %out, i8 zeroext %in) nounwind { ; HSA-VI-LABEL: name: i8_zext_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -66,11 +66,11 @@ define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zero ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = zext i8 %in to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { +define amdgpu_kernel void @i8_sext_arg(ptr addrspace(1) nocapture %out, i8 signext %in) nounwind { ; HSA-VI-LABEL: name: i8_sext_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -100,11 +100,11 @@ define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 sign ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = sext i8 %in to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { +define amdgpu_kernel void @i16_arg(ptr addrspace(1) nocapture %out, i16 %in) nounwind { ; HSA-VI-LABEL: name: i16_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -134,11 +134,11 @@ define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) no ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = zext i16 %in to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { +define amdgpu_kernel void @i16_zext_arg(ptr addrspace(1) nocapture %out, i16 zeroext %in) nounwind { ; HSA-VI-LABEL: name: i16_zext_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -168,11 +168,11 @@ define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 ze ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = zext i16 %in to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { +define amdgpu_kernel void @i16_sext_arg(ptr addrspace(1) nocapture %out, i16 signext %in) nounwind { ; HSA-VI-LABEL: name: i16_sext_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -202,11 +202,11 @@ define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 si ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = sext i16 %in to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind { +define amdgpu_kernel void @i32_arg(ptr addrspace(1) nocapture %out, i32 %in) nounwind { ; HSA-VI-LABEL: name: i32_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -234,11 +234,11 @@ define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) no ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store i32 %in, i32 addrspace(1)* %out, align 4 + store i32 %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind { +define amdgpu_kernel void @f32_arg(ptr addrspace(1) nocapture %out, float %in) nounwind { ; HSA-VI-LABEL: name: f32_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -266,11 +266,11 @@ define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store float %in, float addrspace(1)* %out, align 4 + store float %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { +define amdgpu_kernel void @v2i8_arg(ptr addrspace(1) %out, <2 x i8> %in) { ; HSA-VI-LABEL: name: v2i8_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -298,11 +298,11 @@ define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <2 x i8> %in, <2 x i8> addrspace(1)* %out + store <2 x i8> %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) { +define amdgpu_kernel void @v2i16_arg(ptr addrspace(1) %out, <2 x i16> %in) { ; HSA-VI-LABEL: name: v2i16_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -330,11 +330,11 @@ define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <2 x i16> %in, <2 x i16> addrspace(1)* %out + store <2 x i16> %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind { +define amdgpu_kernel void @v2i32_arg(ptr addrspace(1) nocapture %out, <2 x i32> %in) nounwind { ; HSA-VI-LABEL: name: v2i32_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -362,11 +362,11 @@ define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4 + store <2 x i32> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind { +define amdgpu_kernel void @v2f32_arg(ptr addrspace(1) nocapture %out, <2 x float> %in) nounwind { ; HSA-VI-LABEL: name: v2f32_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -394,11 +394,11 @@ define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, < ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4 + store <2 x float> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { +define amdgpu_kernel void @v3i8_arg(ptr addrspace(1) nocapture %out, <3 x i8> %in) nounwind { ; HSA-VI-LABEL: name: v3i8_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -426,11 +426,11 @@ define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 + store <3 x i8> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind { +define amdgpu_kernel void @v3i16_arg(ptr addrspace(1) nocapture %out, <3 x i16> %in) nounwind { ; HSA-VI-LABEL: name: v3i16_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -458,11 +458,11 @@ define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4 + store <3 x i16> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind { +define amdgpu_kernel void @v3i32_arg(ptr addrspace(1) nocapture %out, <3 x i32> %in) nounwind { ; HSA-VI-LABEL: name: v3i32_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -490,11 +490,11 @@ define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4 + store <3 x i32> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind { +define amdgpu_kernel void @v3f32_arg(ptr addrspace(1) nocapture %out, <3 x float> %in) nounwind { ; HSA-VI-LABEL: name: v3f32_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -522,11 +522,11 @@ define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, < ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4 + store <3 x float> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { +define amdgpu_kernel void @v4i8_arg(ptr addrspace(1) %out, <4 x i8> %in) { ; HSA-VI-LABEL: name: v4i8_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -554,11 +554,11 @@ define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <4 x i8> %in, <4 x i8> addrspace(1)* %out + store <4 x i8> %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) { +define amdgpu_kernel void @v4i16_arg(ptr addrspace(1) %out, <4 x i16> %in) { ; HSA-VI-LABEL: name: v4i16_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -586,11 +586,11 @@ define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <4 x i16> %in, <4 x i16> addrspace(1)* %out + store <4 x i16> %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind { +define amdgpu_kernel void @v4i32_arg(ptr addrspace(1) nocapture %out, <4 x i32> %in) nounwind { ; HSA-VI-LABEL: name: v4i32_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -618,11 +618,11 @@ define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 + store <4 x i32> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind { +define amdgpu_kernel void @v4f32_arg(ptr addrspace(1) nocapture %out, <4 x float> %in) nounwind { ; HSA-VI-LABEL: name: v4f32_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -650,11 +650,11 @@ define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, < ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4 + store <4 x float> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { +define amdgpu_kernel void @v8i8_arg(ptr addrspace(1) %out, <8 x i8> %in) { ; HSA-VI-LABEL: name: v8i8_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -682,11 +682,11 @@ define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <8 x i8> %in, <8 x i8> addrspace(1)* %out + store <8 x i8> %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) { +define amdgpu_kernel void @v8i16_arg(ptr addrspace(1) %out, <8 x i16> %in) { ; HSA-VI-LABEL: name: v8i16_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -714,11 +714,11 @@ define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <8 x i16> %in, <8 x i16> addrspace(1)* %out + store <8 x i16> %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind { +define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture %out, <8 x i32> %in) nounwind { ; HSA-VI-LABEL: name: v8i32_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -746,11 +746,11 @@ define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4 + store <8 x i32> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind { +define amdgpu_kernel void @v8f32_arg(ptr addrspace(1) nocapture %out, <8 x float> %in) nounwind { ; HSA-VI-LABEL: name: v8f32_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -778,11 +778,11 @@ define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, < ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4 + store <8 x float> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) { +define amdgpu_kernel void @v16i8_arg(ptr addrspace(1) %out, <16 x i8> %in) { ; HSA-VI-LABEL: name: v16i8_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -810,11 +810,11 @@ define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <16 x i8> %in, <16 x i8> addrspace(1)* %out + store <16 x i8> %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) { +define amdgpu_kernel void @v16i16_arg(ptr addrspace(1) %out, <16 x i16> %in) { ; HSA-VI-LABEL: name: v16i16_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -842,11 +842,11 @@ define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <16 x i16> %in, <16 x i16> addrspace(1)* %out + store <16 x i16> %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind { +define amdgpu_kernel void @v16i32_arg(ptr addrspace(1) nocapture %out, <16 x i32> %in) nounwind { ; HSA-VI-LABEL: name: v16i32_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -874,11 +874,11 @@ define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, < ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4 + store <16 x i32> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind { +define amdgpu_kernel void @v16f32_arg(ptr addrspace(1) nocapture %out, <16 x float> %in) nounwind { ; HSA-VI-LABEL: name: v16f32_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -906,11 +906,11 @@ define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4 + store <16 x float> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { +define amdgpu_kernel void @kernel_arg_i64(ptr addrspace(1) %out, i64 %a) nounwind { ; HSA-VI-LABEL: name: kernel_arg_i64 ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -937,11 +937,11 @@ define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwi ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - store i64 %a, i64 addrspace(1)* %out, align 8 + store i64 %a, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double %in) { +define amdgpu_kernel void @f64_kernel_arg(ptr addrspace(1) %out, double %in) { ; HSA-VI-LABEL: name: f64_kernel_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -969,11 +969,11 @@ define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double %in ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: - store double %in, double addrspace(1)* %out + store double %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { +define amdgpu_kernel void @i1_arg(ptr addrspace(1) %out, i1 %x) nounwind { ; HSA-VI-LABEL: name: i1_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1000,11 +1000,11 @@ define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - store i1 %x, i1 addrspace(1)* %out, align 1 + store i1 %x, ptr addrspace(1) %out, align 1 ret void } -define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { +define amdgpu_kernel void @i1_arg_zext_i32(ptr addrspace(1) %out, i1 %x) nounwind { ; HSA-VI-LABEL: name: i1_arg_zext_i32 ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1034,11 +1034,11 @@ define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwi ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = zext i1 %x to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { +define amdgpu_kernel void @i1_arg_zext_i64(ptr addrspace(1) %out, i1 %x) nounwind { ; HSA-VI-LABEL: name: i1_arg_zext_i64 ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1068,11 +1068,11 @@ define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwi ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = zext i1 %x to i64 - store i64 %ext, i64 addrspace(1)* %out, align 8 + store i64 %ext, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { +define amdgpu_kernel void @i1_arg_sext_i32(ptr addrspace(1) %out, i1 %x) nounwind { ; HSA-VI-LABEL: name: i1_arg_sext_i32 ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1102,11 +1102,11 @@ define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwi ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = sext i1 %x to i32 - store i32 %ext, i32addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { +define amdgpu_kernel void @i1_arg_sext_i64(ptr addrspace(1) %out, i1 %x) nounwind { ; HSA-VI-LABEL: name: i1_arg_sext_i64 ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1136,7 +1136,7 @@ define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwi ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %ext = sext i1 %x to i64 - store i64 %ext, i64 addrspace(1)* %out, align 8 + store i64 %ext, ptr addrspace(1) %out, align 8 ret void } @@ -1152,7 +1152,7 @@ define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind { ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: empty_struct_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1163,9 +1163,9 @@ define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind { ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - store i32 %arg1, i32 addrspace(1)* undef + store i32 %arg1, ptr addrspace(1) undef ret void } @@ -1179,7 +1179,7 @@ define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind { ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: empty_array_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1190,9 +1190,9 @@ define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind { ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - store i32 %arg1, i32 addrspace(1)* undef + store i32 %arg1, ptr addrspace(1) undef ret void } @@ -1226,13 +1226,11 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad, ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) ; HSA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4) ; HSA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1) - ; HSA-VI-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1) - ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD4]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: struct_argument_alignment ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1255,27 +1253,25 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad, ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; LEGACY-MESA-VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1) - ; LEGACY-MESA-VI-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD4]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %val0 = extractvalue {i32, i64} %arg0, 0 %val1 = extractvalue {i32, i64} %arg0, 1 %val2 = extractvalue {i32, i64} %arg1, 0 %val3 = extractvalue {i32, i64} %arg1, 1 - store volatile i32 %val0, i32 addrspace(1)* null - store volatile i64 %val1, i64 addrspace(1)* null - store volatile i8 %pad, i8 addrspace(1)* null - store volatile i32 %val2, i32 addrspace(1)* null - store volatile i64 %val3, i64 addrspace(1)* null + store volatile i32 %val0, ptr addrspace(1) null + store volatile i64 %val1, ptr addrspace(1) null + store volatile i8 %pad, ptr addrspace(1) null + store volatile i32 %val2, ptr addrspace(1) null + store volatile i64 %val3, ptr addrspace(1) null ret void } -define amdgpu_kernel void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %arg0, i8 %pad, {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1) { +define amdgpu_kernel void @pointer_in_struct_argument({ptr addrspace(3), ptr addrspace(1)} %arg0, i8 %pad, {ptr addrspace(3), ptr addrspace(1234)} %arg1) { ; HSA-VI-LABEL: name: pointer_in_struct_argument ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1297,14 +1293,11 @@ define amdgpu_kernel void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addr ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) ; HSA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4) ; HSA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1) - ; HSA-VI-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1) - ; HSA-VI-NEXT: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1) - ; HSA-VI-NEXT: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](p1), [[C5]](p1) :: (volatile store (p1) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD4]](p1234), [[C5]](p1) :: (volatile store (p1234) into `ptr addrspace(1) null`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: pointer_in_struct_argument ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1327,24 +1320,21 @@ define amdgpu_kernel void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addr ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; LEGACY-MESA-VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1) - ; LEGACY-MESA-VI-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1) - ; LEGACY-MESA-VI-NEXT: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](p1), [[C5]](p1) :: (volatile store (p1) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD4]](p1234), [[C5]](p1) :: (volatile store (p1234) into `ptr addrspace(1) null`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - %val0 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 0 - %val1 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 1 - %val2 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 0 - %val3 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 1 - store volatile i8 addrspace(3)* %val0, i8 addrspace(3)* addrspace(1)* null - store volatile i8 addrspace(1)* %val1, i8 addrspace(1)* addrspace(1)* null - store volatile i8 %pad, i8 addrspace(1)* null - store volatile i8 addrspace(3)* %val2, i8 addrspace(3)* addrspace(1)* null - store volatile i8 addrspace(1234)* %val3, i8 addrspace(1234)* addrspace(1)* null + %val0 = extractvalue {ptr addrspace(3), ptr addrspace(1)} %arg0, 0 + %val1 = extractvalue {ptr addrspace(3), ptr addrspace(1)} %arg0, 1 + %val2 = extractvalue {ptr addrspace(3), ptr addrspace(1234)} %arg1, 0 + %val3 = extractvalue {ptr addrspace(3), ptr addrspace(1234)} %arg1, 1 + store volatile ptr addrspace(3) %val0, ptr addrspace(1) null + store volatile ptr addrspace(1) %val1, ptr addrspace(1) null + store volatile i8 %pad, ptr addrspace(1) null + store volatile ptr addrspace(3) %val2, ptr addrspace(1) null + store volatile ptr addrspace(1234) %val3, ptr addrspace(1) null ret void } @@ -1369,11 +1359,10 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4) ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1) - ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD3]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment ; LEGACY-MESA-VI: bb.1 (%ir-block.1): @@ -1393,24 +1382,23 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; LEGACY-MESA-VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %val0 = extractvalue <{i32, i64}> %arg0, 0 %val1 = extractvalue <{i32, i64}> %arg0, 1 %val2 = extractvalue <{i32, i64}> %arg1, 0 %val3 = extractvalue <{i32, i64}> %arg1, 1 - store volatile i32 %val0, i32 addrspace(1)* null - store volatile i64 %val1, i64 addrspace(1)* null - store volatile i32 %val2, i32 addrspace(1)* null - store volatile i64 %val3, i64 addrspace(1)* null + store volatile i32 %val0, ptr addrspace(1) null + store volatile i64 %val1, ptr addrspace(1) null + store volatile i32 %val2, ptr addrspace(1) null + store volatile i64 %val3, ptr addrspace(1) null ret void } -define amdgpu_kernel void @unused_i32_arg(i32 addrspace(1)* nocapture %out, i32 %unused, i32 %in) nounwind { +define amdgpu_kernel void @unused_i32_arg(ptr addrspace(1) nocapture %out, i32 %unused, i32 %in) nounwind { ; HSA-VI-LABEL: name: unused_i32_arg ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1428,7 +1416,7 @@ entry: } ; Byref pointers should only be treated as offsets from kernarg -define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %out, i8 addrspace(4)* byref(i8) %in.byref) { +define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i8) %in.byref) { ; HSA-VI-LABEL: name: byref_constant_i8_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1457,13 +1445,13 @@ define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %ou ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - %in = load i8, i8 addrspace(4)* %in.byref + %in = load i8, ptr addrspace(4) %in.byref %ext = zext i8 %in to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %out, i16 addrspace(4)* byref(i16) align 2 %in.byref) { +define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i16) align 2 %in.byref) { ; HSA-VI-LABEL: name: byref_constant_i16_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1492,13 +1480,13 @@ define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %o ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - %in = load i16, i16 addrspace(4)* %in.byref + %in = load i16, ptr addrspace(4) %in.byref %ext = zext i16 %in to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align 4 %in.byref, i32 %after.offset) { +define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align 4 %in.byref, i32 %after.offset) { ; HSA-VI-LABEL: name: byref_constant_i32_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1533,13 +1521,13 @@ define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %o ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - %in = load i32, i32 addrspace(4)* %in.byref - store volatile i32 %in, i32 addrspace(1)* %out, align 4 - store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4 + %in = load i32, ptr addrspace(4) %in.byref + store volatile i32 %in, ptr addrspace(1) %out, align 4 + store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> addrspace(4)* byref(<4 x i32>) align(16) %in.byref, i32 %after.offset) { +define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(<4 x i32>) align(16) %in.byref, i32 %after.offset) { ; HSA-VI-LABEL: name: byref_constant_v4i32_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1555,7 +1543,7 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.cast, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_constant_v4i32_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1572,16 +1560,15 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.cast, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - %in = load <4 x i32>, <4 x i32> addrspace(4)* %in.byref - store volatile <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 - %out.cast = bitcast <4 x i32> addrspace(1)* %out to i32 addrspace(1)* - store volatile i32 %after.offset, i32 addrspace(1)* %out.cast, align 4 + %in = load <4 x i32>, ptr addrspace(4) %in.byref + store volatile <4 x i32> %in, ptr addrspace(1) %out, align 4 + store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(256) %in.byref, i32 %after.offset) { +define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) { ; HSA-VI-LABEL: name: byref_align_constant_i32_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1616,13 +1603,13 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapt ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - %in = load i32, i32 addrspace(4)* %in.byref - store volatile i32 %in, i32 addrspace(1)* %out, align 4 - store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4 + %in = load i32, ptr addrspace(4) %in.byref + store volatile i32 %in, ptr addrspace(1) %out, align 4 + store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace(1)* nocapture %out, i8, <16 x i32> addrspace(4)* byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) { +define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) { ; HSA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg ; HSA-VI: bb.1 (%ir-block.1): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1637,7 +1624,7 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.cast.out, align 4, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg @@ -1654,18 +1641,17 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.cast.out, align 4, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - %in = load <16 x i32>, <16 x i32> addrspace(4)* %in.byref - %cast.out = bitcast i32 addrspace(1)* %out to <16 x i32> addrspace(1)* - store volatile <16 x i32> %in, <16 x i32> addrspace(1)* %cast.out, align 4 - store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4 + %in = load <16 x i32>, ptr addrspace(4) %in.byref + store volatile <16 x i32> %in, ptr addrspace(1) %out, align 4 + store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 ret void } ; Also accept byref kernel arguments with other global address spaces. -define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* byref(i32) align(4) %in.byref) { +define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(1) byref(i32) align(4) %in.byref) { ; HSA-VI-LABEL: name: byref_global_i32_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1694,12 +1680,12 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p1) :: (dereferenceable "amdgpu-noclobber" load (s32) from %ir.in.byref, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - %in = load i32, i32 addrspace(1)* %in.byref - store i32 %in, i32 addrspace(1)* %out, align 4 + %in = load i32, ptr addrspace(1) %in.byref + store i32 %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, i32* byref(i32) align(4) %in.byref) { +define amdgpu_kernel void @byref_flat_i32_arg(ptr addrspace(1) nocapture %out, ptr byref(i32) align(4) %in.byref) { ; HSA-VI-LABEL: name: byref_flat_i32_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1728,12 +1714,12 @@ define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - %in = load i32, i32* %in.byref - store i32 %in, i32 addrspace(1)* %out, align 4 + %in = load i32, ptr %in.byref + store i32 %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(6)* byref(i32) align(4) %in.byref) { +define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(6) byref(i32) align(4) %in.byref) { ; HSA-VI-LABEL: name: byref_constant_32bit_i32_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1762,12 +1748,12 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapt ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - %in = load i32, i32 addrspace(6)* %in.byref - store i32 %in, i32 addrspace(1)* %out, align 4 + %in = load i32, ptr addrspace(6) %in.byref + store i32 %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(999)* byref(i32) align(4) %in.byref) { +define amdgpu_kernel void @byref_unknown_as_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(999) byref(i32) align(4) %in.byref) { ; HSA-VI-LABEL: name: byref_unknown_as_i32_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1796,13 +1782,13 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - %in = load i32, i32 addrspace(999)* %in.byref - store i32 %in, i32 addrspace(1)* %out, align 4 + %in = load i32, ptr addrspace(999) %in.byref + store i32 %in, ptr addrspace(1) %out, align 4 ret void } ; Invalid, but should not crash. -define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(3)* byref(i32) align(4) %in.byref) { +define amdgpu_kernel void @byref_local_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(3) byref(i32) align(4) %in.byref) { ; HSA-VI-LABEL: name: byref_local_i32_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1831,12 +1817,12 @@ define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - %in = load i32, i32 addrspace(3)* %in.byref - store i32 %in, i32 addrspace(1)* %out, align 4 + %in = load i32, ptr addrspace(3) %in.byref + store i32 %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(4) %in0.byref, i32 addrspace(4)* byref(i32) align(4) %in1.byref, i32 %after.offset) { +define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(4) %in0.byref, ptr addrspace(4) byref(i32) align(4) %in1.byref, i32 %after.offset) { ; HSA-VI-LABEL: name: multi_byref_constant_i32_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1879,15 +1865,15 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapt ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - %in0 = load i32, i32 addrspace(4)* %in0.byref - %in1 = load i32, i32 addrspace(4)* %in1.byref - store volatile i32 %in0, i32 addrspace(1)* %out, align 4 - store volatile i32 %in1, i32 addrspace(1)* %out, align 4 - store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4 + %in0 = load i32, ptr addrspace(4) %in0.byref + %in1 = load i32, ptr addrspace(4) %in1.byref + store volatile i32 %in0, ptr addrspace(1) %out, align 4 + store volatile i32 %in1, ptr addrspace(1) %out, align 4 + store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byref(i32) align(4) %in.byref) { +define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref(i32) align(4) %in.byref) { ; HSA-VI-LABEL: name: byref_constant_i32_arg_offset0 ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1897,7 +1883,7 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) - ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg_offset0 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1908,14 +1894,14 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - %in = load i32, i32 addrspace(4)* %in.byref - store i32 %in, i32 addrspace(1)* undef, align 4 + %in = load i32, ptr addrspace(4) %in.byref + store i32 %in, ptr addrspace(1) undef, align 4 ret void } -define amdgpu_kernel void @p3i8_arg(i8 addrspace(3)* %arg) nounwind { +define amdgpu_kernel void @p3i8_arg(ptr addrspace(3) %arg) nounwind { ; HSA-VI-LABEL: name: p3i8_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1938,11 +1924,11 @@ define amdgpu_kernel void @p3i8_arg(i8 addrspace(3)* %arg) nounwind { ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 ; LEGACY-MESA-VI-NEXT: G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - store i8 9, i8 addrspace(3)* %arg, align 4 + store i8 9, ptr addrspace(3) %arg, align 4 ret void } -define amdgpu_kernel void @p1i8_arg(i8 addrspace(1)* %arg) nounwind { +define amdgpu_kernel void @p1i8_arg(ptr addrspace(1) %arg) nounwind { ; HSA-VI-LABEL: name: p1i8_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1950,7 +1936,7 @@ define amdgpu_kernel void @p1i8_arg(i8 addrspace(1)* %arg) nounwind { ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0 - ; HSA-VI-NEXT: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3) + ; HSA-VI-NEXT: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `ptr addrspace(3) null`, addrspace 3) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: p1i8_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1959,13 +1945,13 @@ define amdgpu_kernel void @p1i8_arg(i8 addrspace(1)* %arg) nounwind { ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0 - ; LEGACY-MESA-VI-NEXT: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3) + ; LEGACY-MESA-VI-NEXT: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `ptr addrspace(3) null`, addrspace 3) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - store i8 9, i8 addrspace(3)* null + store i8 9, ptr addrspace(3) null ret void } -define amdgpu_kernel void @v2p1i8_arg(<2 x i8 addrspace(1)*> %arg) nounwind { +define amdgpu_kernel void @v2p1i8_arg(<2 x ptr addrspace(1)> %arg) nounwind { ; HSA-VI-LABEL: name: v2p1i8_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -1975,7 +1961,7 @@ define amdgpu_kernel void @v2p1i8_arg(<2 x i8 addrspace(1)*> %arg) nounwind { ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v2p1i8_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1986,13 +1972,13 @@ define amdgpu_kernel void @v2p1i8_arg(<2 x i8 addrspace(1)*> %arg) nounwind { ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - store <2 x i8 addrspace(1)*> %arg, <2 x i8 addrspace(1)*> addrspace(1)* undef + store <2 x ptr addrspace(1)> %arg, ptr addrspace(1) undef ret void } -define amdgpu_kernel void @v2p3i8_arg(<2 x i8 addrspace(3)*> %arg) nounwind { +define amdgpu_kernel void @v2p3i8_arg(<2 x ptr addrspace(3)> %arg) nounwind { ; HSA-VI-LABEL: name: v2p3i8_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -2002,7 +1988,7 @@ define amdgpu_kernel void @v2p3i8_arg(<2 x i8 addrspace(3)*> %arg) nounwind { ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 16, addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v2p3i8_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -2013,13 +1999,13 @@ define amdgpu_kernel void @v2p3i8_arg(<2 x i8 addrspace(3)*> %arg) nounwind { ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - store <2 x i8 addrspace(3)*> %arg, <2 x i8 addrspace(3)*> addrspace(1)* undef + store <2 x ptr addrspace(3)> %arg, ptr addrspace(1) undef ret void } -define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg) nounwind { +define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x ptr addrspace(1)>, <2 x ptr addrspace(3)> } %arg) nounwind { ; HSA-VI-LABEL: name: v2p1i8_in_struct_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 @@ -2032,10 +2018,10 @@ define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 16, addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef` + 16, align 16, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v2p1i8_in_struct_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -2049,11 +2035,11 @@ define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef` + 16, align 16, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 - store { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg, { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef + store { <2 x ptr addrspace(1)>, <2 x ptr addrspace(3)> } %arg, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll index 7445bb3b2a3b2..5169c9b634d3e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll @@ -27,7 +27,7 @@ define amdgpu_vs void @test_f32(float %arg0) { ret void } -define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) { +define amdgpu_vs void @test_ptr2_inreg(ptr addrspace(4) inreg %arg0) { ; CHECK-LABEL: name: test_ptr2_inreg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3 @@ -37,11 +37,11 @@ define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) { ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile invariant load (s32) from %ir.arg0, addrspace 4) ; CHECK-NEXT: S_ENDPGM 0 - %tmp0 = load volatile i32, i32 addrspace(4)* %arg0 + %tmp0 = load volatile i32, ptr addrspace(4) %arg0 ret void } -define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* inreg %arg1) { +define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, ptr addrspace(4) inreg %arg1) { ; CHECK-LABEL: name: test_sgpr_alignment0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4 @@ -54,7 +54,7 @@ define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile invariant load (s32) from %ir.arg1, addrspace 4) ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 - %tmp0 = load volatile i32, i32 addrspace(4)* %arg1 + %tmp0 = load volatile i32, ptr addrspace(4) %arg1 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll index aa19fc8b26fca..55318eae4d72f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs -o - %s | FileCheck %s ; TODO: Could potentially insert it here -define void @arg_align_8(i8 addrspace(1)* align 8 %arg0) { +define void @arg_align_8(ptr addrspace(1) align 8 %arg0) { ; CHECK-LABEL: name: arg_align_8 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 @@ -13,12 +13,12 @@ define void @arg_align_8(i8 addrspace(1)* align 8 %arg0) { ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK-NEXT: G_STORE [[C]](s8), [[MV]](p1) :: (store (s8) into %ir.arg0, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i8 0, i8 addrspace(1)* %arg0, align 8 + store i8 0, ptr addrspace(1) %arg0, align 8 ret void } -declare i8 addrspace(1)* @returns_ptr() -declare align 8 i8 addrspace(1)* @returns_ptr_align8() +declare ptr addrspace(1) @returns_ptr() +declare align 8 ptr addrspace(1) @returns_ptr_align8() define void @call_result_align_1() { ; CHECK-LABEL: name: call_result_align_1 @@ -64,8 +64,8 @@ define void @call_result_align_1() { ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: G_STORE [[C]](s8), [[MV]](p1) :: (store (s8) into %ir.ptr, addrspace 1) ; CHECK-NEXT: SI_RETURN - %ptr = call align 1 i8 addrspace(1)* @returns_ptr() - store i8 0, i8 addrspace(1)* %ptr, align 1 + %ptr = call align 1 ptr addrspace(1) @returns_ptr() + store i8 0, ptr addrspace(1) %ptr, align 1 ret void } @@ -114,8 +114,8 @@ define void @call_result_align_8() { ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8 ; CHECK-NEXT: G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN - %ptr = call align 8 i8 addrspace(1)* @returns_ptr() - store i8 0, i8 addrspace(1)* %ptr, align 8 + %ptr = call align 8 ptr addrspace(1) @returns_ptr() + store i8 0, ptr addrspace(1) %ptr, align 8 ret void } @@ -164,12 +164,12 @@ define void @declaration_result_align_8() { ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8 ; CHECK-NEXT: G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN - %ptr = call i8 addrspace(1)* @returns_ptr_align8() - store i8 0, i8 addrspace(1)* %ptr, align 8 + %ptr = call ptr addrspace(1) @returns_ptr_align8() + store i8 0, ptr addrspace(1) %ptr, align 8 ret void } -define i8 addrspace(1)* @tail_call_assert_align() { +define ptr addrspace(1) @tail_call_assert_align() { ; CHECK-LABEL: name: tail_call_assert_align ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 @@ -206,6 +206,6 @@ define i8 addrspace(1)* @tail_call_assert_align() { ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @returns_ptr_align8, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: - %call = tail call i8 addrspace(1)* @returns_ptr_align8() - ret i8 addrspace(1)* %call + %call = tail call ptr addrspace(1) @returns_ptr_align8() + ret ptr addrspace(1) %call } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll index 5206284cf5723..e5c11381865f5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -o - %s | FileCheck %s -define float @test_atomicrmw_fadd(float addrspace(3)* %addr) { +define float @test_atomicrmw_fadd(ptr addrspace(3) %addr) { ; CHECK-LABEL: name: test_atomicrmw_fadd ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 @@ -11,11 +11,11 @@ define float @test_atomicrmw_fadd(float addrspace(3)* %addr) { ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[C]] :: (load store seq_cst (s32) on %ir.addr, addrspace 3) ; CHECK-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %oldval = atomicrmw fadd float addrspace(3)* %addr, float 1.0 seq_cst + %oldval = atomicrmw fadd ptr addrspace(3) %addr, float 1.0 seq_cst ret float %oldval } -define float @test_atomicrmw_fsub(float addrspace(3)* %addr) { +define float @test_atomicrmw_fsub(ptr addrspace(3) %addr) { ; CHECK-LABEL: name: test_atomicrmw_fsub ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: successors: %bb.2(0x80000000) @@ -33,7 +33,7 @@ define float @test_atomicrmw_fsub(float addrspace(3)* %addr) { ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %8(s64), %bb.2, [[C1]](s64), %bb.1 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %6(s32), %bb.2 ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]] - ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.2, addrspace 3) + ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.addr, addrspace 3) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64) ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INT]](s64) ; CHECK-NEXT: G_BRCOND [[INT1]](s1), %bb.3 @@ -45,6 +45,6 @@ define float @test_atomicrmw_fsub(float addrspace(3)* %addr) { ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %oldval = atomicrmw fsub float addrspace(3)* %addr, float 1.0 seq_cst + %oldval = atomicrmw fsub ptr addrspace(3) %addr, float 1.0 seq_cst ret float %oldval } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll index 4103b8055e388..a9516450834a9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll @@ -64,7 +64,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) @@ -80,8 +80,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN - %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef - %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 + %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef + %val = load { i8, i32 }, ptr addrspace(1) %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val) ret void } @@ -90,7 +90,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) @@ -106,8 +106,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN - %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef - %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 + %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef + %val = load { i8, i32 }, ptr addrspace(1) %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll index 4ded7eba7ab15..572b2e5a8b245 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -26,11 +26,11 @@ declare half @external_f16_func_void() #0 declare float @external_f32_func_void() #0 declare double @external_f64_func_void() #0 -declare i8 addrspace(1)* @external_p1_func_void() #0 -declare <2 x i8 addrspace(1)*> @external_v2p1_func_void() #0 +declare ptr addrspace(1) @external_p1_func_void() #0 +declare <2 x ptr addrspace(1)> @external_v2p1_func_void() #0 -declare i8 addrspace(3)* @external_p3_func_void() #0 -declare <2 x i8 addrspace(3)*> @external_v2p3_func_void() #0 +declare ptr addrspace(3) @external_p3_func_void() #0 +declare <2 x ptr addrspace(3)> @external_v2p3_func_void() #0 declare <2 x half> @external_v2f16_func_void() #0 declare <3 x half> @external_v3f16_func_void() #0 @@ -47,7 +47,7 @@ declare <8 x i32> @external_v8i32_func_void() #0 declare <16 x i32> @external_v16i32_func_void() #0 declare <32 x i32> @external_v32i32_func_void() #0 declare <33 x i32> @external_v33i32_func_void() #0 -declare <33 x i32> @external_v33i32_func_v33i32_i32(<33 x i32> addrspace(1)*, i32) #0 +declare <33 x i32> @external_v33i32_func_v33i32_i32(ptr addrspace(1), i32) #0 declare { <32 x i32>, i32 } @external_v32i32_i32_func_void() #0 declare { i32, <32 x i32> } @external_i32_v32i32_func_void() #0 @@ -67,7 +67,7 @@ declare { i32, i64 } @external_gfx_i32_i64_func_void() #0 declare hidden i32 @external_gfx_i32_func_i32(i32) #0 -define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 { ; GCN-LABEL: name: test_call_external_i32_func_i32_imm ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 @@ -84,7 +84,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset.cast, align 16, addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset1, align 16, addrspace 4) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_i32 ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -124,11 +124,11 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* ; GCN-NEXT: G_STORE [[COPY21]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i32 @external_i32_func_i32(i32 42) - store volatile i32 %val, i32 addrspace(1)* %out + store volatile i32 %val, ptr addrspace(1) %out ret void } -define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 { +define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_func_i32_imm ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 @@ -148,7 +148,7 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1) ; GCN-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i32 @external_gfx_i32_func_i32(i32 42) - store volatile i32 %val, i32 addrspace(1)* %out + store volatile i32 %val, ptr addrspace(1) %out ret void } @@ -204,10 +204,10 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 { ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i1 @external_i1_func_void() - store volatile i1 %val, i1 addrspace(1)* undef + store volatile i1 %val, ptr addrspace(1) undef ret void } @@ -223,10 +223,10 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 { ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i1 @external_gfx_i1_func_void() - store volatile i1 %val, i1 addrspace(1)* undef + store volatile i1 %val, ptr addrspace(1) undef ret void } @@ -284,11 +284,11 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 { ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) - ; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i1 @external_i1_zeroext_func_void() %val.ext = zext i1 %val to i32 - store volatile i32 %val.ext, i32 addrspace(1)* undef + store volatile i32 %val.ext, ptr addrspace(1) undef ret void } @@ -346,11 +346,11 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 { ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) - ; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i1 @external_i1_signext_func_void() %val.ext = sext i1 %val to i32 - store volatile i32 %val.ext, i32 addrspace(1)* undef + store volatile i32 %val.ext, ptr addrspace(1) undef ret void } @@ -407,10 +407,10 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 { ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i8 @external_i8_func_void() - store volatile i8 %val, i8 addrspace(1)* undef + store volatile i8 %val, ptr addrspace(1) undef ret void } @@ -427,10 +427,10 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 { ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i8 @external_gfx_i8_func_void() - store volatile i8 %val, i8 addrspace(1)* undef + store volatile i8 %val, ptr addrspace(1) undef ret void } @@ -488,11 +488,11 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 { ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) - ; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i8 @external_i8_zeroext_func_void() %val.ext = zext i8 %val to i32 - store volatile i32 %val.ext, i32 addrspace(1)* undef + store volatile i32 %val.ext, ptr addrspace(1) undef ret void } @@ -550,11 +550,11 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 { ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) - ; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i8 @external_i8_signext_func_void() %val.ext = sext i8 %val to i32 - store volatile i32 %val.ext, i32 addrspace(1)* undef + store volatile i32 %val.ext, ptr addrspace(1) undef ret void } @@ -610,10 +610,10 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 { ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i16 @external_i16_func_void() - store volatile i16 %val, i16 addrspace(1)* undef + store volatile i16 %val, ptr addrspace(1) undef ret void } @@ -671,11 +671,11 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 { ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) - ; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i16 @external_i16_zeroext_func_void() %val.ext = zext i16 %val to i32 - store volatile i32 %val.ext, i32 addrspace(1)* undef + store volatile i32 %val.ext, ptr addrspace(1) undef ret void } @@ -733,11 +733,11 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 { ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) - ; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i16 @external_i16_signext_func_void() %val.ext = sext i16 %val to i32 - store volatile i32 %val.ext, i32 addrspace(1)* undef + store volatile i32 %val.ext, ptr addrspace(1) undef ret void } @@ -792,10 +792,10 @@ define amdgpu_kernel void @test_call_external_i32_func_void() #0 { ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i32 @external_i32_func_void() - store volatile i32 %val, i32 addrspace(1)* undef + store volatile i32 %val, ptr addrspace(1) undef ret void } @@ -810,10 +810,10 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 { ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i32 @external_gfx_i32_func_void() - store volatile i32 %val, i32 addrspace(1)* undef + store volatile i32 %val, ptr addrspace(1) undef ret void } @@ -871,10 +871,10 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 { ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (volatile store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (volatile store (s48) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i48 @external_i48_func_void() - store volatile i48 %val, i48 addrspace(1)* undef + store volatile i48 %val, ptr addrspace(1) undef ret void } @@ -933,11 +933,11 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 { ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; GCN-NEXT: G_STORE [[ZEXT]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[ZEXT]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i48 @external_i48_zeroext_func_void() %ext = zext i48 %val to i64 - store volatile i64 %ext, i64 addrspace(1)* undef + store volatile i64 %ext, ptr addrspace(1) undef ret void } @@ -996,11 +996,11 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 { ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) - ; GCN-NEXT: G_STORE [[SEXT]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[SEXT]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i48 @external_i48_signext_func_void() %ext = sext i48 %val to i64 - store volatile i64 %ext, i64 addrspace(1)* undef + store volatile i64 %ext, ptr addrspace(1) undef ret void } @@ -1057,10 +1057,10 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 { ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i64 @external_i64_func_void() - store volatile i64 %val, i64 addrspace(1)* undef + store volatile i64 %val, ptr addrspace(1) undef ret void } @@ -1117,10 +1117,10 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 { ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (volatile store (p1) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 - %val = call i8 addrspace(1)* @external_p1_func_void() - store volatile i8 addrspace(1)* %val, i8 addrspace(1)* addrspace(1)* undef + %val = call ptr addrspace(1) @external_p1_func_void() + store volatile ptr addrspace(1) %val, ptr addrspace(1) undef ret void } @@ -1181,10 +1181,10 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 { ; GCN-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (volatile store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (volatile store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 - %val = call <2 x i8 addrspace(1)*> @external_v2p1_func_void() - store volatile <2 x i8 addrspace(1)*> %val, <2 x i8 addrspace(1)*> addrspace(1)* undef + %val = call <2 x ptr addrspace(1)> @external_v2p1_func_void() + store volatile <2 x ptr addrspace(1)> %val, ptr addrspace(1) undef ret void } @@ -1239,10 +1239,10 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 { ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](p3), [[DEF]](p3) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(3)* undef`, addrspace 3) + ; GCN-NEXT: G_STORE [[COPY21]](p3), [[DEF]](p3) :: (volatile store (p3) into `ptr addrspace(3) undef`, addrspace 3) ; GCN-NEXT: S_ENDPGM 0 - %val = call i8 addrspace(3)* @external_p3_func_void() - store volatile i8 addrspace(3)* %val, i8 addrspace(3)* addrspace(3)* undef + %val = call ptr addrspace(3) @external_p3_func_void() + store volatile ptr addrspace(3) %val, ptr addrspace(3) undef ret void } @@ -1299,10 +1299,10 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 { ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(p3) = COPY $vgpr1 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY21]](p3), [[COPY22]](p3) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p3) :: (volatile store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(3)* undef`, addrspace 3) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p3) :: (volatile store (<2 x p3>) into `ptr addrspace(3) undef`, addrspace 3) ; GCN-NEXT: S_ENDPGM 0 - %val = call <2 x i8 addrspace(3)*> @external_v2p3_func_void() - store volatile <2 x i8 addrspace(3)*> %val, <2 x i8 addrspace(3)*> addrspace(3)* undef + %val = call <2 x ptr addrspace(3)> @external_v2p3_func_void() + store volatile <2 x ptr addrspace(3)> %val, ptr addrspace(3) undef ret void } @@ -1358,10 +1358,10 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 { ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call half @external_f16_func_void() - store volatile half %val, half addrspace(1)* undef + store volatile half %val, ptr addrspace(1) undef ret void } @@ -1416,10 +1416,10 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 { ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `float addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call float @external_f32_func_void() - store volatile float %val, float addrspace(1)* undef + store volatile float %val, ptr addrspace(1) undef ret void } @@ -1476,10 +1476,10 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 { ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `double addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call double @external_f64_func_void() - store volatile double %val, double addrspace(1)* undef + store volatile double %val, ptr addrspace(1) undef ret void } @@ -1540,10 +1540,10 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 { ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <2 x double> @external_v2f64_func_void() - store volatile <2 x double> %val, <2 x double> addrspace(1)* undef + store volatile <2 x double> %val, ptr addrspace(1) undef ret void } @@ -1600,10 +1600,10 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 { ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <2 x i32> @external_v2i32_func_void() - store volatile <2 x i32> %val, <2 x i32> addrspace(1)* undef + store volatile <2 x i32> %val, ptr addrspace(1) undef ret void } @@ -1661,10 +1661,10 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 { ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <3 x i32> @external_v3i32_func_void() - store volatile <3 x i32> %val, <3 x i32> addrspace(1)* undef, align 8 + store volatile <3 x i32> %val, ptr addrspace(1) undef, align 8 ret void } @@ -1723,10 +1723,10 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 { ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <4 x i32> @external_v4i32_func_void() - store volatile <4 x i32> %val, <4 x i32> addrspace(1)* undef, align 8 + store volatile <4 x i32> %val, ptr addrspace(1) undef, align 8 ret void } @@ -1786,10 +1786,10 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 { ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <5 x i32> @external_v5i32_func_void() - store volatile <5 x i32> %val, <5 x i32> addrspace(1)* undef, align 8 + store volatile <5 x i32> %val, ptr addrspace(1) undef, align 8 ret void } @@ -1852,10 +1852,10 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 { ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <8 x i32> @external_v8i32_func_void() - store volatile <8 x i32> %val, <8 x i32> addrspace(1)* undef, align 8 + store volatile <8 x i32> %val, ptr addrspace(1) undef, align 8 ret void } @@ -1926,10 +1926,10 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 { ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <16 x i32> @external_v16i32_func_void() - store volatile <16 x i32> %val, <16 x i32> addrspace(1)* undef, align 8 + store volatile <16 x i32> %val, ptr addrspace(1) undef, align 8 ret void } @@ -2016,10 +2016,10 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 { ; GCN-NEXT: [[COPY52:%[0-9]+]]:_(s32) = COPY $vgpr31 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32), [[COPY51]](s32), [[COPY52]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <32 x i32> @external_v32i32_func_void() - store volatile <32 x i32> %val, <32 x i32> addrspace(1)* undef, align 8 + store volatile <32 x i32> %val, ptr addrspace(1) undef, align 8 ret void } @@ -2074,10 +2074,10 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 { ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <2 x i16> @external_v2i16_func_void() - store volatile <2 x i16> %val, <2 x i16> addrspace(1)* undef + store volatile <2 x i16> %val, ptr addrspace(1) undef ret void } @@ -2136,10 +2136,10 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 { ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <3 x i16> @external_v3i16_func_void() - store volatile <3 x i16> %val, <3 x i16> addrspace(1)* undef + store volatile <3 x i16> %val, ptr addrspace(1) undef ret void } @@ -2196,10 +2196,10 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 { ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <4 x i16> @external_v4i16_func_void() - store volatile <4 x i16> %val, <4 x i16> addrspace(1)* undef + store volatile <4 x i16> %val, ptr addrspace(1) undef ret void } @@ -2254,10 +2254,10 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 { ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <2 x half> @external_v2f16_func_void() - store volatile <2 x half> %val, <2 x half> addrspace(1)* undef + store volatile <2 x half> %val, ptr addrspace(1) undef ret void } @@ -2316,10 +2316,10 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 { ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <3 x half> @external_v3f16_func_void() - store volatile <3 x half> %val, <3 x half> addrspace(1)* undef + store volatile <3 x half> %val, ptr addrspace(1) undef ret void } @@ -2376,10 +2376,10 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 { ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <4 x half> @external_v4f16_func_void() - store volatile <4 x half> %val, <4 x half> addrspace(1)* undef + store volatile <4 x half> %val, ptr addrspace(1) undef ret void } @@ -2437,10 +2437,10 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 { ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `ptr addrspace(1) undef`, align 16, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <3 x float> @external_v3f32_func_void() - store volatile <3 x float> %val, <3 x float> addrspace(1)* undef + store volatile <3 x float> %val, ptr addrspace(1) undef ret void } @@ -2500,10 +2500,10 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 { ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `<5 x float> addrspace(1)* undef`, align 32, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <5 x float> @external_v5f32_func_void() - store volatile <5 x float> %val, <5 x float> addrspace(1)* undef + store volatile <5 x float> %val, ptr addrspace(1) undef ret void } @@ -2524,53 +2524,52 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 { ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_i64_func_void - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64) - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[MV]](s64), [[COPY10]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call { i32, i64 } @external_i32_i64_func_void() %val.0 = extractvalue { i32, i64 } %val, 0 %val.1 = extractvalue { i32, i64 } %val, 1 - store volatile i32 %val.0, i32 addrspace(1)* undef - store volatile i64 %val.1, i64 addrspace(1)* undef + store volatile i32 %val.0, ptr addrspace(1) undef + store volatile i64 %val.1, ptr addrspace(1) undef ret void } @@ -2578,25 +2577,24 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_i64_func_void ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[MV]](s64), [[COPY]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() %val.0 = extractvalue { i32, i64 } %val, 0 %val.1 = extractvalue { i32, i64 } %val, 1 - store volatile i32 %val.0, i32 addrspace(1)* undef - store volatile i64 %val.1, i64 addrspace(1)* undef + store volatile i32 %val.0, ptr addrspace(1) undef + store volatile i64 %val.1, ptr addrspace(1) undef ret void } @@ -2652,14 +2650,14 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 { ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call [2 x i32] @external_a2i32_func_void() %val.0 = extractvalue [2 x i32] %val, 0 %val.1 = extractvalue [2 x i32] %val, 1 - store volatile i32 %val.0, i32 addrspace(1)* undef - store volatile i32 %val.1, i32 addrspace(1)* undef + store volatile i32 %val.0, ptr addrspace(1) undef + store volatile i32 %val.1, ptr addrspace(1) undef ret void } @@ -2728,11 +2726,11 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 { ; GCN-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32) ; GCN-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[TRUNC3]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[TRUNC5]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[TRUNC7]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[TRUNC9]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC3]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC5]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC7]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[TRUNC9]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call [5 x i8] @external_a5i8_func_void() %val.0 = extractvalue [5 x i8] %val, 0 @@ -2740,11 +2738,11 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 { %val.2 = extractvalue [5 x i8] %val, 2 %val.3 = extractvalue [5 x i8] %val, 3 %val.4 = extractvalue [5 x i8] %val, 4 - store volatile i8 %val.0, i8 addrspace(1)* undef - store volatile i8 %val.1, i8 addrspace(1)* undef - store volatile i8 %val.2, i8 addrspace(1)* undef - store volatile i8 %val.3, i8 addrspace(1)* undef - store volatile i8 %val.4, i8 addrspace(1)* undef + store volatile i8 %val.0, ptr addrspace(1) undef + store volatile i8 %val.1, ptr addrspace(1) undef + store volatile i8 %val.2, ptr addrspace(1) undef + store volatile i8 %val.3, ptr addrspace(1) undef + store volatile i8 %val.4, ptr addrspace(1) undef ret void } @@ -2764,39 +2762,38 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 { ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v32i32_i32_func_void - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64) - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 @@ -2805,14 +2802,14 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 { ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5) - ; GCN-NEXT: G_STORE [[LOAD]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1) - ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[LOAD]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call { <32 x i32>, i32 } @external_v32i32_i32_func_void() %val0 = extractvalue { <32 x i32>, i32 } %val, 0 %val1 = extractvalue { <32 x i32>, i32 } %val, 1 - store volatile <32 x i32> %val0, <32 x i32> addrspace(1)* undef, align 8 - store volatile i32 %val1, i32 addrspace(1)* undef + store volatile <32 x i32> %val0, ptr addrspace(1) undef, align 8 + store volatile i32 %val1, ptr addrspace(1) undef ret void } @@ -2832,39 +2829,38 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 { ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_v32i32_func_void - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64) - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 @@ -2873,14 +2869,14 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 { ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5) - ; GCN-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[COPY10]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call { i32, <32 x i32> } @external_i32_v32i32_func_void() %val0 = extractvalue { i32, <32 x i32> } %val, 0 %val1 = extractvalue { i32, <32 x i32> } %val, 1 - store volatile i32 %val0, i32 addrspace(1)* undef - store volatile <32 x i32> %val1, <32 x i32> addrspace(1)* undef, align 8 + store volatile i32 %val0, ptr addrspace(1) undef + store volatile <32 x i32> %val1, ptr addrspace(1) undef, align 8 ret void } @@ -2937,14 +2933,14 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 { ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5) - ; GCN-NEXT: G_STORE [[LOAD]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[LOAD]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <33 x i32> @external_v33i32_func_void() - store volatile <33 x i32> %val, <33 x i32> addrspace(1)* undef, align 8 + store volatile <33 x i32> %val, ptr addrspace(1) undef, align 8 ret void } -define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) #0 { +define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspace(1) %p, i32 %idx) #0 { ; GCN-LABEL: name: test_call_external_v33i32_func_v33i32_i32 ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 @@ -2961,10 +2957,10 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32> ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset.cast, align 16, addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset1, align 16, addrspace 4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C]](s64) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset.cast, align 8, addrspace 4) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset, align 8, addrspace 4) ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32 @@ -3006,10 +3002,10 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32> ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5) - ; GCN-NEXT: G_STORE [[LOAD2]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[LOAD2]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 - %val = call <33 x i32> @external_v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) - store volatile <33 x i32> %val, <33 x i32> addrspace(1)* undef, align 8 + %val = call <33 x i32> @external_v33i32_func_v33i32_i32(ptr addrspace(1) %p, i32 %idx) + store volatile <33 x i32> %val, ptr addrspace(1) undef, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll index b32e8211f05e2..8fb54005c07f4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }), { i8, i32 } addrspace(5)* byval({ i8, i32 })) #0 +declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0 define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 { ; GCN-LABEL: name: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 @@ -21,34 +21,33 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.in.val ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1.out.val ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32) - ; GCN-NEXT: G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.in.gep01, addrspace 5) + ; GCN-NEXT: G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.in.val, addrspace 5) ; GCN-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.in.gep1, addrspace 5) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C3]](s64) - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C5]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; GCN-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -56,37 +55,37 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GCN-NEXT: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX1]](p5) - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; GCN-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (s8) from %ir.out.gep02, addrspace 5) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (s8) from %ir.out.val, addrspace 5) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (dereferenceable load (s32) from %ir.out.gep1, addrspace 5) - ; GCN-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %in.val = alloca { i8, i32 }, align 4, addrspace(5) %out.val = alloca { i8, i32 }, align 4, addrspace(5) - %in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 0 - %in.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 1 - store i8 3, i8 addrspace(5)* %in.gep0 - store i32 8, i32 addrspace(5)* %in.gep1 - call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* %out.val, { i8, i32 } addrspace(5)* %in.val) - %out.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 0 - %out.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 1 - %out.val0 = load i8, i8 addrspace(5)* %out.gep0 - %out.val1 = load i32, i32 addrspace(5)* %out.gep1 - store volatile i8 %out.val0, i8 addrspace(1)* undef - store volatile i32 %out.val1, i32 addrspace(1)* undef + %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0 + %in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1 + store i8 3, ptr addrspace(5) %in.gep0 + store i32 8, ptr addrspace(5) %in.gep1 + call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) %out.val, ptr addrspace(5) %in.val) + %out.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 0 + %out.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 1 + %out.val0 = load i8, ptr addrspace(5) %out.gep0 + %out.val1 = load i32, ptr addrspace(5) %out.gep1 + store volatile i8 %out.val0, ptr addrspace(1) undef + store volatile i32 %out.val1, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index 0948411064af5..b77b6609b0f8a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -29,8 +29,8 @@ declare hidden void @external_void_func_i48(i48) #0 declare hidden void @external_void_func_i48_signext(i48 signext) #0 declare hidden void @external_void_func_i48_zeroext(i48 zeroext) #0 -declare hidden void @external_void_func_p0(i8*) #0 -declare hidden void @external_void_func_v2p0(<2 x i8*>) #0 +declare hidden void @external_void_func_p0(ptr) #0 +declare hidden void @external_void_func_v2p0(<2 x ptr>) #0 declare hidden void @external_void_func_f16(half) #0 declare hidden void @external_void_func_f32(float) #0 @@ -62,13 +62,13 @@ declare hidden void @external_void_func_v8i32(<8 x i32>) #0 declare hidden void @external_void_func_v16i32(<16 x i32>) #0 declare hidden void @external_void_func_v32i32(<32 x i32>) #0 declare hidden void @external_void_func_v32i32_i32(<32 x i32>, i32) #0 -declare hidden void @external_void_func_v32i32_p3_p5(<32 x i32>, i8 addrspace(3)*, i8 addrspace(5)*) #0 +declare hidden void @external_void_func_v32i32_p3_p5(<32 x i32>, ptr addrspace(3), ptr addrspace(5)) #0 declare hidden void @external_void_func_v32i32_i8_i8_i16(<32 x i32>, i8, i8, i16) #0 ; Structs declare hidden void @external_void_func_struct_i8_i32({ i8, i32 }) #0 -declare hidden void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 })) #0 -declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }), { i8, i32 } addrspace(5)* byval({ i8, i32 })) #0 +declare hidden void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) #0 +declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0 declare hidden void @external_void_func_v2i8(<2 x i8>) #0 declare hidden void @external_void_func_v3i8(<3 x i8>) #0 @@ -76,7 +76,7 @@ declare hidden void @external_void_func_v4i8(<4 x i8>) #0 declare hidden void @external_void_func_v8i8(<8 x i8>) #0 declare hidden void @external_void_func_v16i8(<16 x i8>) #0 -declare hidden void @byval_align16_f64_arg(<32 x i32>, double addrspace(5)* byval(double) align 16) #0 +declare hidden void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) #0 declare hidden void @stack_passed_f64_arg(<32 x i32>, double) #0 declare hidden void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0 @@ -391,7 +391,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_signext ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -429,7 +429,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %var = load volatile i1, i1 addrspace(1)* undef + %var = load volatile i1, ptr addrspace(1) undef call void @external_void_func_i1_signext(i1 signext %var) ret void } @@ -451,7 +451,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_zeroext ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -489,7 +489,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %var = load volatile i1, i1 addrspace(1)* undef + %var = load volatile i1, ptr addrspace(1) undef call void @external_void_func_i1_zeroext(i1 zeroext %var) ret void } @@ -570,7 +570,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_signext ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -609,7 +609,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %var = load volatile i8, i8 addrspace(1)* undef + %var = load volatile i8, ptr addrspace(1) undef call void @external_void_func_i8_signext(i8 signext %var) ret void } @@ -631,7 +631,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_zeroext ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -670,7 +670,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %var = load volatile i8, i8 addrspace(1)* undef + %var = load volatile i8, ptr addrspace(1) undef call void @external_void_func_i8_zeroext(i8 zeroext %var) ret void } @@ -749,7 +749,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_signext ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -787,7 +787,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %var = load volatile i16, i16 addrspace(1)* undef + %var = load volatile i16, ptr addrspace(1) undef call void @external_void_func_i16_signext(i16 signext %var) ret void } @@ -809,7 +809,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_zeroext ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -847,7 +847,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %var = load volatile i16, i16 addrspace(1)* undef + %var = load volatile i16, ptr addrspace(1) undef call void @external_void_func_i16_zeroext(i16 zeroext %var) ret void } @@ -1021,7 +1021,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1062,7 +1062,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %val = load <2 x i64>, <2 x i64> addrspace(1)* null + %val = load <2 x i64>, ptr addrspace(1) null call void @external_void_func_v2i64(<2 x i64> %val) ret void } @@ -1146,7 +1146,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1186,7 +1186,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %var = load volatile i48, i48 addrspace(1)* undef + %var = load volatile i48, ptr addrspace(1) undef call void @external_void_func_i48(i48 %var) ret void } @@ -1208,7 +1208,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_signext ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1248,7 +1248,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %var = load volatile i48, i48 addrspace(1)* undef + %var = load volatile i48, ptr addrspace(1) undef call void @external_void_func_i48_signext(i48 signext %var) ret void } @@ -1270,7 +1270,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_zeroext ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1310,12 +1310,12 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %var = load volatile i48, i48 addrspace(1)* undef + %var = load volatile i48, ptr addrspace(1) undef call void @external_void_func_i48_zeroext(i48 zeroext %var) ret void } -define amdgpu_kernel void @test_call_external_void_func_p0_imm(i8* %arg) #0 { +define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 { ; CHECK-LABEL: name: test_call_external_void_func_p0_imm ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 @@ -1331,7 +1331,7 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(i8* %arg) #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset.cast, align 16, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset1, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p0 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1370,7 +1370,7 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(i8* %arg) #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - call void @external_void_func_p0(i8* %arg) + call void @external_void_func_p0(ptr %arg) ret void } @@ -1390,7 +1390,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p0>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x p0>) from `<2 x i8*> addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p0>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x p0>) from `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2p0 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1431,8 +1431,8 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %val = load <2 x i8*>, <2 x i8*> addrspace(1)* null - call void @external_void_func_v2p0(<2 x i8*> %val) + %val = load <2 x ptr>, ptr addrspace(1) null + call void @external_void_func_v2p0(<2 x ptr> %val) ret void } @@ -1455,7 +1455,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[DEF]](s64) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<3 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i64 @@ -1499,7 +1499,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %load = load <2 x i64>, <2 x i64> addrspace(1)* null + %load = load <2 x i64>, ptr addrspace(1) null %val = shufflevector <2 x i64> %load, <2 x i64> , <3 x i32> call void @external_void_func_v3i64(<3 x i64> %val) @@ -1525,7 +1525,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C2]](s64) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2, 3) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i64 @@ -1571,7 +1571,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %load = load <2 x i64>, <2 x i64> addrspace(1)* null + %load = load <2 x i64>, ptr addrspace(1) null %val = shufflevector <2 x i64> %load, <2 x i64> , <4 x i32> call void @external_void_func_v4i64(<4 x i64> %val) ret void @@ -2079,7 +2079,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2116,7 +2116,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %val = load <2 x i16>, <2 x i16> addrspace(1)* undef + %val = load <2 x i16>, ptr addrspace(1) undef call void @external_void_func_v2i16(<2 x i16> %val) ret void } @@ -2137,7 +2137,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2179,7 +2179,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %val = load <3 x i16>, <3 x i16> addrspace(1)* undef + %val = load <3 x i16>, ptr addrspace(1) undef call void @external_void_func_v3i16(<3 x i16> %val) ret void } @@ -2200,7 +2200,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2242,7 +2242,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %val = load <3 x half>, <3 x half> addrspace(1)* undef + %val = load <3 x half>, ptr addrspace(1) undef call void @external_void_func_v3f16(<3 x half> %val) ret void } @@ -2263,7 +2263,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2302,7 +2302,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %val = load <4 x i16>, <4 x i16> addrspace(1)* undef + %val = load <4 x i16>, ptr addrspace(1) undef call void @external_void_func_v4i16(<4 x i16> %val) ret void } @@ -2385,7 +2385,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<5 x s16>) from `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<5 x s16>) from `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2428,7 +2428,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %val = load <5 x i16>, <5 x i16> addrspace(1)* undef + %val = load <5 x i16>, ptr addrspace(1) undef call void @external_void_func_v5i16(<5 x i16> %val) ret void } @@ -2449,7 +2449,7 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<7 x s16>) from `<7 x i16> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<7 x s16>) from `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v7i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2493,7 +2493,7 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %val = load <7 x i16>, <7 x i16> addrspace(1)* undef + %val = load <7 x i16>, ptr addrspace(1) undef call void @external_void_func_v7i16(<7 x i16> %val) ret void } @@ -2514,7 +2514,7 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<63 x s16>) from `<63 x i16> addrspace(1)* undef`, align 128, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<63 x s16>) from `ptr addrspace(1) undef`, align 128, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v63i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2589,7 +2589,7 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %val = load <63 x i16>, <63 x i16> addrspace(1)* undef + %val = load <63 x i16>, ptr addrspace(1) undef call void @external_void_func_v63i16(<63 x i16> %val) ret void } @@ -2610,7 +2610,7 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<65 x s16>) from `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<65 x s16>) from `ptr addrspace(1) undef`, align 256, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v65i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2688,7 +2688,7 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %val = load <65 x i16>, <65 x i16> addrspace(1)* undef + %val = load <65 x i16>, ptr addrspace(1) undef call void @external_void_func_v65i16(<65 x i16> %val) ret void } @@ -2709,7 +2709,7 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<66 x s16>) from `<66 x i16> addrspace(1)* undef`, align 256, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<66 x s16>) from `ptr addrspace(1) undef`, align 256, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v66i16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2784,7 +2784,7 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %val = load <66 x i16>, <66 x i16> addrspace(1)* undef + %val = load <66 x i16>, ptr addrspace(1) undef call void @external_void_func_v66i16(<66 x i16> %val) ret void } @@ -2805,7 +2805,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f16 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2842,7 +2842,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %val = load <2 x half>, <2 x half> addrspace(1)* undef + %val = load <2 x half>, ptr addrspace(1) undef call void @external_void_func_v2f16(<2 x half> %val) ret void } @@ -2863,7 +2863,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s32>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2902,7 +2902,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %val = load <2 x i32>, <2 x i32> addrspace(1)* undef + %val = load <2 x i32>, ptr addrspace(1) undef call void @external_void_func_v2i32(<2 x i32> %val) ret void } @@ -3111,7 +3111,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s32>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3152,7 +3152,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %val = load <4 x i32>, <4 x i32> addrspace(1)* undef + %val = load <4 x i32>, ptr addrspace(1) undef call void @external_void_func_v4i32(<4 x i32> %val) ret void } @@ -3303,7 +3303,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32 @@ -3349,8 +3349,8 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef - %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr + %ptr = load ptr addrspace(1), ptr addrspace(4) undef + %val = load <8 x i32>, ptr addrspace(1) %ptr call void @external_void_func_v8i32(<8 x i32> %val) ret void } @@ -3443,7 +3443,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i32 @@ -3497,8 +3497,8 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef - %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr + %ptr = load ptr addrspace(1), ptr addrspace(4) undef + %val = load <16 x i32>, ptr addrspace(1) %ptr call void @external_void_func_v16i32(<16 x i32> %val) ret void } @@ -3519,7 +3519,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 @@ -3592,8 +3592,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef - %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr + %ptr = load ptr addrspace(1), ptr addrspace(4) undef + %val = load <32 x i32>, ptr addrspace(1) %ptr call void @external_void_func_v32i32(<32 x i32> %val) ret void } @@ -3616,9 +3616,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s32) from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s32) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i32 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3693,9 +3693,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef - %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0 - %val1 = load i32, i32 addrspace(1)* undef + %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef + %val0 = load <32 x i32>, ptr addrspace(1) %ptr0 + %val1 = load i32, ptr addrspace(1) undef call void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1) ret void } @@ -3717,31 +3717,30 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF1]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY10]](p1) :: ("amdgpu-noclobber" load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -3752,10 +3751,10 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 { ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store (s16) into stack + 4, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32) - ; CHECK-NEXT: G_STORE [[COPY21]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5) + ; CHECK-NEXT: G_STORE [[COPY20]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5) @@ -3790,25 +3789,25 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 16, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef - %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0 - %val1 = load i8, i8 addrspace(1)* undef - %val2 = load i8, i8 addrspace(1)* undef - %val3 = load i16, i16 addrspace(1)* undef + %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef + %val0 = load <32 x i32>, ptr addrspace(1) %ptr0 + %val1 = load i8, ptr addrspace(1) undef + %val2 = load i8, ptr addrspace(1) undef + %val3 = load i16, ptr addrspace(1) undef call void @external_void_func_v32i32_i8_i8_i16(<32 x i32> %val0, i8 %val1, i8 %val2, i16 %val3) ret void } @@ -3830,31 +3829,30 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 { ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF1]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p3) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p3) from `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[COPY10]](p1) :: ("amdgpu-noclobber" load (p5) from `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p3) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p3) from `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p5) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_p3_p5 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -3898,25 +3896,25 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef - %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0 - %val1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* undef - %val2 = load i8 addrspace(5)*, i8 addrspace(5)* addrspace(1)* undef - call void @external_void_func_v32i32_p3_p5(<32 x i32> %val0, i8 addrspace(3)* %val1, i8 addrspace(5)* %val2) + %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef + %val0 = load <32 x i32>, ptr addrspace(1) %ptr0 + %val1 = load ptr addrspace(3), ptr addrspace(1) undef + %val2 = load ptr addrspace(5), ptr addrspace(1) undef + call void @external_void_func_v32i32_p3_p5(<32 x i32> %val0, ptr addrspace(3) %val1, ptr addrspace(5) %val2) ret void } @@ -3936,7 +3934,7 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (s8) from %ir.ptr0, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) @@ -3980,8 +3978,8 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef - %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 + %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef + %val = load { i8, i32 }, ptr addrspace(1) %ptr0 call void @external_void_func_struct_i8_i32({ i8, i32 } %val) ret void } @@ -3990,7 +3988,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) @@ -4006,8 +4004,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN - %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef - %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 + %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef + %val = load { i8, i32 }, ptr addrspace(1) %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val) ret void } @@ -4016,7 +4014,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) @@ -4032,8 +4030,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN - %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef - %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 + %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef + %val = load { i8, i32 }, ptr addrspace(1) %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val) ret void } @@ -4058,7 +4056,7 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.val ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32) - ; CHECK-NEXT: G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.gep01, addrspace 5) + ; CHECK-NEXT: G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.val, addrspace 5) ; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_byval_struct_i8_i32 @@ -4101,17 +4099,17 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %val = alloca { i8, i32 }, align 4, addrspace(5) - %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 0 - %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1 - store i8 3, i8 addrspace(5)* %gep0 - store i32 8, i32 addrspace(5)* %gep1 - call void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %val) + %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 0 + %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 1 + store i8 3, ptr addrspace(5) %gep0 + store i32 8, ptr addrspace(5) %gep1 + call void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %val) ret void } -declare void @void_func_byval_a3i32_byval_i8_align32([3 x i32] addrspace(5)* byval([3 x i32]) %arg0, i8 addrspace(5)* byval(i8) align 32 %arg1, i32 %arg2) #0 +declare void @void_func_byval_a3i32_byval_i8_align32(ptr addrspace(5) byval([3 x i32]) %arg0, ptr addrspace(5) byval(i8) align 32 %arg1, i32 %arg2) #0 -define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming0, i8 addrspace(5)* align 32 %incoming1) #0 { +define void @call_byval_3ai32_byval_i8_align32(ptr addrspace(5) %incoming0, ptr addrspace(5) align 32 %incoming1) #0 { ; CHECK-LABEL: name: call_byval_3ai32_byval_i8_align32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 @@ -4163,15 +4161,15 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc ; CHECK-NEXT: SI_RETURN - call void @void_func_byval_a3i32_byval_i8_align32([3 x i32] addrspace(5)* byval([3 x i32]) %incoming0, i8 addrspace(5)* align 32 %incoming1, i32 999) + call void @void_func_byval_a3i32_byval_i8_align32(ptr addrspace(5) byval([3 x i32]) %incoming0, ptr addrspace(5) align 32 %incoming1, i32 999) ret void } -declare void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %arg0) #0 +declare void @void_func_byval_a4i64_align4(ptr addrspace(5) byval([4 x i64]) align 4 %arg0) #0 ; Make sure we are aware of the higher alignment of the incoming value ; than implied by the outgoing byval alignment in the memory operand. -define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* align 256 %incoming_high_align) #0 { +define void @call_byval_a4i64_align4_higher_source_align(ptr addrspace(5) align 256 %incoming_high_align) #0 { ; CHECK-LABEL: name: call_byval_a4i64_align4_higher_source_align ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 @@ -4216,7 +4214,7 @@ define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 32, implicit-def $scc ; CHECK-NEXT: SI_RETURN - call void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %incoming_high_align) + call void @void_func_byval_a4i64_align4(ptr addrspace(5) byval([4 x i64]) align 4 %incoming_high_align) ret void } @@ -4236,7 +4234,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<2 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<2 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i8 @@ -4280,8 +4278,8 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %ptr = load <2 x i8> addrspace(1)*, <2 x i8> addrspace(1)* addrspace(4)* undef - %val = load <2 x i8>, <2 x i8> addrspace(1)* %ptr + %ptr = load ptr addrspace(1), ptr addrspace(4) undef + %val = load <2 x i8>, ptr addrspace(1) %ptr call void @external_void_func_v2i8(<2 x i8> %val) ret void } @@ -4302,7 +4300,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<3 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<3 x s8>) from %ir.ptr, align 4, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i8 @@ -4349,8 +4347,8 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %ptr = load <3 x i8> addrspace(1)*, <3 x i8> addrspace(1)* addrspace(4)* undef - %val = load <3 x i8>, <3 x i8> addrspace(1)* %ptr + %ptr = load ptr addrspace(1), ptr addrspace(4) undef + %val = load <3 x i8>, ptr addrspace(1) %ptr call void @external_void_func_v3i8(<3 x i8> %val) ret void } @@ -4371,7 +4369,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<4 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i8 @@ -4421,8 +4419,8 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %ptr = load <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef - %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %ptr = load ptr addrspace(1), ptr addrspace(4) undef + %val = load <4 x i8>, ptr addrspace(1) %ptr call void @external_void_func_v4i8(<4 x i8> %val) ret void } @@ -4443,7 +4441,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<8 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i8 @@ -4505,8 +4503,8 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %ptr = load <8 x i8> addrspace(1)*, <8 x i8> addrspace(1)* addrspace(4)* undef - %val = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %ptr = load ptr addrspace(1), ptr addrspace(4) undef + %val = load <8 x i8>, ptr addrspace(1) %ptr call void @external_void_func_v8i8(<8 x i8> %val) ret void } @@ -4527,7 +4525,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i8 @@ -4613,8 +4611,8 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 - %ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef - %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %ptr = load ptr addrspace(1), ptr addrspace(4) undef + %val = load <16 x i8>, ptr addrspace(1) %ptr call void @external_void_func_v16i8(<16 x i8> %val) ret void } @@ -4635,10 +4633,10 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset.cast, align 16, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset1, align 16, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset.cast, align 16, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll index 44bf8ce235d6f..1d5490bec296f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll @@ -12,10 +12,10 @@ define amdgpu_kernel void @constant_fold_vector_add() { ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64), [[C2]](s64), [[C2]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s64>), [[C1]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s64>), [[C1]](p1) :: (store (<4 x s64>) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 entry: %add = add <4 x i64> zeroinitializer, zeroinitializer - store <4 x i64> %add, <4 x i64> addrspace(1)* null, align 32 + store <4 x i64> %add, ptr addrspace(1) null, align 32 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll index 138e66ffd6c7a..6a0975f7fa72e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll @@ -17,7 +17,7 @@ define i32 @test() { ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - ret i32 bitcast (<1 x i32> bitcast (i32 zext (i1 icmp eq (i32* @var, i32* inttoptr (i32 -1 to i32*)) to i32) to <1 x i32>), i64 0)> to i32) + ret i32 bitcast (<1 x i32> bitcast (i32 zext (i1 icmp eq (ptr @var, ptr inttoptr (i32 -1 to ptr)) to i32) to <1 x i32>), i64 0)> to i32) } @gint = external addrspace(1) global i8, align 4 @@ -34,9 +34,9 @@ define amdgpu_kernel void @constantexpr_select_0() { ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - store i32 select (i1 icmp eq (i8 addrspace(1)* @gint, i8 addrspace(1)* null), i32 1, i32 0), i32 addrspace(1)* undef, align 4 + store i32 select (i1 icmp eq (ptr addrspace(1) @gint, ptr addrspace(1) null), i32 1, i32 0), ptr addrspace(1) undef, align 4 ret void } @@ -51,9 +51,9 @@ define amdgpu_kernel void @constantexpr_select_1() { ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - store i32 select (i1 icmp eq (i8 addrspace(1)* @gint, i8 addrspace(1)* inttoptr (i64 1024 to i8 addrspace(1)*)), i32 1, i32 0), i32 addrspace(1)* undef, align 4 + store i32 select (i1 icmp eq (ptr addrspace(1) @gint, ptr addrspace(1) inttoptr (i64 1024 to ptr addrspace(1))), i32 1, i32 0), ptr addrspace(1) undef, align 4 ret void } @@ -74,5 +74,5 @@ define i32 @test_fcmp_constexpr() { ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: - ret i32 zext (i1 fcmp oeq (float uitofp (i1 icmp eq (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i64 0, i64 1), i32* @var) to float), float 0.000000e+00) to i32) + ret i32 zext (i1 fcmp oeq (float uitofp (i1 icmp eq (ptr getelementptr inbounds ([2 x i32], ptr @a, i64 0, i64 1), ptr @var) to float), float 0.000000e+00) to i32) } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll index a3ffff28a9d8e..ccd186e11c44f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll @@ -5,7 +5,7 @@ ; CHECK: nnan G_FADD define amdgpu_kernel void @fadd_nnan(float %arg0, float %arg1) { %res = fadd nnan float %arg0, %arg1 - store float %res, float addrspace(1)* undef + store float %res, ptr addrspace(1) undef ret void } @@ -14,7 +14,7 @@ define amdgpu_kernel void @fadd_nnan(float %arg0, float %arg1) { ; CHECK: nnan ninf nsz arcp contract afn reassoc G_FMA define amdgpu_kernel void @fma_fast(float %arg0, float %arg1, float %arg2) { %res = call fast float @llvm.fma.f32(float %arg0, float %arg1, float %arg2) - store float %res, float addrspace(1)* undef + store float %res, ptr addrspace(1) undef ret void } @@ -23,7 +23,7 @@ define amdgpu_kernel void @fma_fast(float %arg0, float %arg1, float %arg2) { ; CHECK: = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %{{[0-9]+}}(s32) define amdgpu_kernel void @rcp_nsz(float %arg0) { %res = call nsz float @llvm.amdgcn.rcp.f32 (float %arg0) - store float %res, float addrspace(1)* undef + store float %res, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll index af0dd868c9092..34d59ea1e608b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll @@ -43,8 +43,8 @@ define void @void_a31i32_i32([31 x i32] %arg0, i32 %arg1) { ; FIXED-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; FIXED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; FIXED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; FIXED-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; FIXED-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; FIXED-NEXT: SI_RETURN - store i32 %arg1, i32 addrspace(1)* undef + store i32 %arg1, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll index 8adc0ac7009b6..34860d46d832e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -12,9 +12,9 @@ define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i32 %arg1, i32 addrspace(1)* undef + store i32 %arg1, ptr addrspace(1) undef ret void } @@ -25,9 +25,9 @@ define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i32 %arg1, i32 addrspace(1)* undef + store i32 %arg1, ptr addrspace(1) undef ret void } @@ -39,9 +39,9 @@ define void @void_func_i1(i1 %arg0) #0 { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i1 %arg0, i1 addrspace(1)* undef + store i1 %arg0, ptr addrspace(1) undef ret void } @@ -57,11 +57,11 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = zext i1 %arg0 to i32 %add = add i32 %ext, 12 - store i32 %add, i32 addrspace(1)* undef + store i32 %add, ptr addrspace(1) undef ret void } @@ -77,11 +77,11 @@ define void @void_func_i1_signext(i1 signext %arg0) #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = sext i1 %arg0 to i32 %add = add i32 %ext, 12 - store i32 %add, i32 addrspace(1)* undef + store i32 %add, ptr addrspace(1) undef ret void } @@ -104,7 +104,7 @@ define void @i1_arg_i1_use(i1 %arg) #0 { ; CHECK-NEXT: bb.2.bb1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: G_BR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.bb2: @@ -114,7 +114,7 @@ bb: br i1 %arg, label %bb2, label %bb1 bb1: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef br label %bb2 bb2: @@ -130,9 +130,9 @@ define void @void_func_i8(i8 %arg0) #0 { ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i8 %arg0, i8 addrspace(1)* undef + store i8 %arg0, ptr addrspace(1) undef ret void } @@ -148,11 +148,11 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = zext i8 %arg0 to i32 %add = add i32 %ext, 12 - store i32 %add, i32 addrspace(1)* undef + store i32 %add, ptr addrspace(1) undef ret void } @@ -168,11 +168,11 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = sext i8 %arg0 to i32 %add = add i32 %ext, 12 - store i32 %add, i32 addrspace(1)* undef + store i32 %add, ptr addrspace(1) undef ret void } @@ -184,9 +184,9 @@ define void @void_func_i16(i16 %arg0) #0 { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i16 %arg0, i16 addrspace(1)* undef + store i16 %arg0, ptr addrspace(1) undef ret void } @@ -202,11 +202,11 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = zext i16 %arg0 to i32 %add = add i32 %ext, 12 - store i32 %add, i32 addrspace(1)* undef + store i32 %add, ptr addrspace(1) undef ret void } @@ -222,11 +222,11 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = sext i16 %arg0 to i32 %add = add i32 %ext, 12 - store i32 %add, i32 addrspace(1)* undef + store i32 %add, ptr addrspace(1) undef ret void } @@ -238,9 +238,9 @@ define void @void_func_i24(i24 %arg0) #0 { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i24 %arg0, i24 addrspace(1)* undef + store i24 %arg0, ptr addrspace(1) undef ret void } @@ -253,9 +253,9 @@ define void @void_func_i24_zeroext(i24 zeroext %arg0) #0 { ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 24 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i24 %arg0, i24 addrspace(1)* undef + store i24 %arg0, ptr addrspace(1) undef ret void } @@ -268,9 +268,9 @@ define void @void_func_i24_signext(i24 signext %arg0) #0 { ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 24 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_SEXT]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i24 %arg0, i24 addrspace(1)* undef + store i24 %arg0, ptr addrspace(1) undef ret void } @@ -281,9 +281,9 @@ define void @void_func_i32(i32 %arg0) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i32 %arg0, i32 addrspace(1)* undef + store i32 %arg0, ptr addrspace(1) undef ret void } @@ -295,9 +295,9 @@ define void @void_func_i32_signext(i32 signext %arg0) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i32 %arg0, i32 addrspace(1)* undef + store i32 %arg0, ptr addrspace(1) undef ret void } @@ -309,22 +309,22 @@ define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i32 %arg0, i32 addrspace(1)* undef + store i32 %arg0, ptr addrspace(1) undef ret void } -define void @void_func_p3i8(i8 addrspace(3)* %arg0) #0 { +define void @void_func_p3i8(ptr addrspace(3) %arg0) #0 { ; CHECK-LABEL: name: void_func_p3i8 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](p3), [[DEF]](p1) :: (store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](p3), [[DEF]](p1) :: (store (p3) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i8 addrspace(3)* %arg0, i8 addrspace(3)* addrspace(1)* undef + store ptr addrspace(3) %arg0, ptr addrspace(1) undef ret void } @@ -338,9 +338,9 @@ define void @void_func_i48(i48 %arg0) #0 { ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i48 %arg0, i48 addrspace(1)* undef + store i48 %arg0, ptr addrspace(1) undef ret void } @@ -357,11 +357,11 @@ define void @void_func_i48_zeroext(i48 zeroext %arg0) #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ZEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = zext i48 %arg0 to i64 %add = add i64 %ext, 12 - store i64 %add, i64 addrspace(1)* undef + store i64 %add, ptr addrspace(1) undef ret void } @@ -378,11 +378,11 @@ define void @void_func_i48_signext(i48 signext %arg0) #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = sext i48 %arg0 to i64 %add = add i64 %ext, 12 - store i64 %add, i64 addrspace(1)* undef + store i64 %add, ptr addrspace(1) undef ret void } @@ -395,9 +395,9 @@ define void @void_func_i64(i64 %arg0) #0 { ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i64 %arg0, i64 addrspace(1)* undef + store i64 %arg0, ptr addrspace(1) undef ret void } @@ -412,9 +412,9 @@ define void @void_func_i95(i95 %arg0) #0 { ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store (s95) into `i95 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store (s95) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i95 %arg0, i95 addrspace(1)* undef + store i95 %arg0, ptr addrspace(1) undef ret void } @@ -432,11 +432,11 @@ define void @void_func_i95_zeroext(i95 zeroext %arg0) #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[TRUNC]](s95) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[ZEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = zext i95 %arg0 to i96 %add = add i96 %ext, 12 - store i96 %add, i96 addrspace(1)* undef + store i96 %add, ptr addrspace(1) undef ret void } @@ -454,11 +454,11 @@ define void @void_func_i95_signext(i95 signext %arg0) #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[TRUNC]](s95) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[SEXT]], [[C]] - ; CHECK-NEXT: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN %ext = sext i95 %arg0 to i96 %add = add i96 %ext, 12 - store i96 %add, i96 addrspace(1)* undef + store i96 %add, ptr addrspace(1) undef ret void } @@ -472,13 +472,13 @@ define void @void_func_i96(i96 %arg0) #0 { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i96 %arg0, i96 addrspace(1)* undef + store i96 %arg0, ptr addrspace(1) undef ret void } -define void @void_func_p0i8(i8* %arg0) #0 { +define void @void_func_p0i8(ptr %arg0) #0 { ; CHECK-LABEL: name: void_func_p0i8 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 @@ -487,13 +487,13 @@ define void @void_func_p0i8(i8* %arg0) #0 { ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `i8* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i8* %arg0, i8* addrspace(1)* undef + store ptr %arg0, ptr addrspace(1) undef ret void } -define void @void_func_p1i8(i8 addrspace(1)* %arg0) #0 { +define void @void_func_p1i8(ptr addrspace(1) %arg0) #0 { ; CHECK-LABEL: name: void_func_p1i8 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 @@ -502,9 +502,9 @@ define void @void_func_p1i8(i8 addrspace(1)* %arg0) #0 { ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i8 addrspace(1)* %arg0, i8 addrspace(1)* addrspace(1)* undef + store ptr addrspace(1) %arg0, ptr addrspace(1) undef ret void } @@ -516,9 +516,9 @@ define void @void_func_f16(half %arg0) #0 { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `half addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store half %arg0, half addrspace(1)* undef + store half %arg0, ptr addrspace(1) undef ret void } @@ -529,9 +529,9 @@ define void @void_func_f32(float %arg0) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store float %arg0, float addrspace(1)* undef + store float %arg0, ptr addrspace(1) undef ret void } @@ -544,9 +544,9 @@ define void @void_func_f64(double %arg0) #0 { ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `double addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store double %arg0, double addrspace(1)* undef + store double %arg0, ptr addrspace(1) undef ret void } @@ -559,9 +559,9 @@ define void @void_func_v2i32(<2 x i32> %arg0) #0 { ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <2 x i32> %arg0, <2 x i32> addrspace(1)* undef + store <2 x i32> %arg0, ptr addrspace(1) undef ret void } @@ -575,9 +575,9 @@ define void @void_func_v2i24(<2 x i24> %arg0) #0 { ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store (<2 x s24>) into `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store (<2 x s24>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <2 x i24> %arg0, <2 x i24> addrspace(1)* undef + store <2 x i24> %arg0, ptr addrspace(1) undef ret void } @@ -592,9 +592,9 @@ define void @void_func_v3i24(<3 x i24> %arg0) #0 { ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store (<3 x s24>) into `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store (<3 x s24>) into `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <3 x i24> %arg0, <3 x i24> addrspace(1)* undef + store <3 x i24> %arg0, ptr addrspace(1) undef ret void } @@ -610,9 +610,9 @@ define void @void_func_v2i8(<2 x i8> %arg0) #0 { ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store (<2 x s8>) into `<2 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store (<2 x s8>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <2 x i8> %arg0, <2 x i8> addrspace(1)* undef + store <2 x i8> %arg0, ptr addrspace(1) undef ret void } @@ -630,9 +630,9 @@ define void @void_func_v3i8(<3 x i8> %arg0) #0 { ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store (<3 x s8>) into `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store (<3 x s8>) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <3 x i8> %arg0, <3 x i8> addrspace(1)* undef + store <3 x i8> %arg0, ptr addrspace(1) undef ret void } @@ -652,13 +652,13 @@ define void @void_func_v4i8(<4 x i8> %arg0) #0 { ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store (<4 x s8>) into `<4 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store (<4 x s8>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <4 x i8> %arg0, <4 x i8> addrspace(1)* undef + store <4 x i8> %arg0, ptr addrspace(1) undef ret void } -define void @void_func_v2p3i8(<2 x i8 addrspace(3)*> %arg0) #0 { +define void @void_func_v2p3i8(<2 x ptr addrspace(3)> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2p3i8 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 @@ -667,9 +667,9 @@ define void @void_func_v2p3i8(<2 x i8 addrspace(3)*> %arg0) #0 { ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <2 x i8 addrspace(3)*> %arg0, <2 x i8 addrspace(3)*> addrspace(1)* undef + store <2 x ptr addrspace(3)> %arg0, ptr addrspace(1) undef ret void } @@ -683,9 +683,9 @@ define void @void_func_v3i32(<3 x i32> %arg0) #0 { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef + store <3 x i32> %arg0, ptr addrspace(1) undef ret void } @@ -700,9 +700,9 @@ define void @void_func_v4i32(<4 x i32> %arg0) #0 { ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <4 x i32> %arg0, <4 x i32> addrspace(1)* undef + store <4 x i32> %arg0, ptr addrspace(1) undef ret void } @@ -718,9 +718,9 @@ define void @void_func_v5i32(<5 x i32> %arg0) #0 { ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store (<5 x s32>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <5 x i32> %arg0, <5 x i32> addrspace(1)* undef + store <5 x i32> %arg0, ptr addrspace(1) undef ret void } @@ -739,9 +739,9 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <8 x i32> %arg0, <8 x i32> addrspace(1)* undef + store <8 x i32> %arg0, ptr addrspace(1) undef ret void } @@ -768,9 +768,9 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 { ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <16 x i32> %arg0, <16 x i32> addrspace(1)* undef + store <16 x i32> %arg0, ptr addrspace(1) undef ret void } @@ -814,9 +814,9 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store <32 x i32> %arg0, ptr addrspace(1) undef ret void } @@ -863,9 +863,9 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 { ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32), [[LOAD1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store (<33 x s32>) into `ptr addrspace(1) undef`, align 256, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <33 x i32> %arg0, <33 x i32> addrspace(1)* undef + store <33 x i32> %arg0, ptr addrspace(1) undef ret void } @@ -882,13 +882,13 @@ define void @void_func_v2i64(<2 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <2 x i64> %arg0, <2 x i64> addrspace(1)* undef + store <2 x i64> %arg0, ptr addrspace(1) undef ret void } -define void @void_func_v2p0i8(<2 x i8*> %arg0) #0 { +define void @void_func_v2p0i8(<2 x ptr> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2p0i8 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -901,13 +901,13 @@ define void @void_func_v2p0i8(<2 x i8*> %arg0) #0 { ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[MV]](p0), [[MV1]](p0) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `<2 x i8*> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <2 x i8*> %arg0, <2 x i8*> addrspace(1)* undef + store <2 x ptr> %arg0, ptr addrspace(1) undef ret void } -define void @void_func_v2p1i8(<2 x i8 addrspace(1)*> %arg0) #0 { +define void @void_func_v2p1i8(<2 x ptr addrspace(1)> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2p1i8 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -920,9 +920,9 @@ define void @void_func_v2p1i8(<2 x i8 addrspace(1)*> %arg0) #0 { ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <2 x i8 addrspace(1)*> %arg0, <2 x i8 addrspace(1)*> addrspace(1)* undef + store <2 x ptr addrspace(1)> %arg0, ptr addrspace(1) undef ret void } @@ -942,9 +942,9 @@ define void @void_func_v3i64(<3 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <3 x i64> %arg0, <3 x i64> addrspace(1)* undef + store <3 x i64> %arg0, ptr addrspace(1) undef ret void } @@ -967,9 +967,9 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <4 x i64> %arg0, <4 x i64> addrspace(1)* undef + store <4 x i64> %arg0, ptr addrspace(1) undef ret void } @@ -995,9 +995,9 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store (<5 x s64>) into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store (<5 x s64>) into `ptr addrspace(1) undef`, align 64, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <5 x i64> %arg0, <5 x i64> addrspace(1)* undef + store <5 x i64> %arg0, ptr addrspace(1) undef ret void } @@ -1032,9 +1032,9 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <8 x i64> %arg0, <8 x i64> addrspace(1)* undef + store <8 x i64> %arg0, ptr addrspace(1) undef ret void } @@ -1094,9 +1094,9 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <16 x i64> %arg0, <16 x i64> addrspace(1)* undef + store <16 x i64> %arg0, ptr addrspace(1) undef ret void } @@ -1107,9 +1107,9 @@ define void @void_func_v2i16(<2 x i16> %arg0) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <2 x i16> %arg0, <2 x i16> addrspace(1)* undef + store <2 x i16> %arg0, ptr addrspace(1) undef ret void } @@ -1124,9 +1124,9 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 { ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef + store <3 x i16> %arg0, ptr addrspace(1) undef ret void } @@ -1139,9 +1139,9 @@ define void @void_func_v4i16(<4 x i16> %arg0) #0 { ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <4 x i16> %arg0, <4 x i16> addrspace(1)* undef + store <4 x i16> %arg0, ptr addrspace(1) undef ret void } @@ -1157,9 +1157,9 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 { ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s16>), [[DEF]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s16>), [[DEF]](p1) :: (store (<5 x s16>) into `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef + store <5 x i16> %arg0, ptr addrspace(1) undef ret void } @@ -1174,9 +1174,9 @@ define void @void_func_v8i16(<8 x i16> %arg0) #0 { ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <8 x i16> %arg0, <8 x i16> addrspace(1)* undef + store <8 x i16> %arg0, ptr addrspace(1) undef ret void } @@ -1195,9 +1195,9 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <16 x i16> %arg0, <16 x i16> addrspace(1)* undef + store <16 x i16> %arg0, ptr addrspace(1) undef ret void } @@ -1247,9 +1247,9 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 { ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16), [[UV65:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<66 x s16>) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<65 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<65 x s16>), [[DEF]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<65 x s16>), [[DEF]](p1) :: (store (<65 x s16>) into `ptr addrspace(1) undef`, align 256, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <65 x i16> %arg0, <65 x i16> addrspace(1)* undef + store <65 x i16> %arg0, ptr addrspace(1) undef ret void } @@ -1262,9 +1262,9 @@ define void @void_func_v2f32(<2 x float> %arg0) #0 { ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <2 x float> %arg0, <2 x float> addrspace(1)* undef + store <2 x float> %arg0, ptr addrspace(1) undef ret void } @@ -1278,9 +1278,9 @@ define void @void_func_v3f32(<3 x float> %arg0) #0 { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <3 x float> %arg0, <3 x float> addrspace(1)* undef + store <3 x float> %arg0, ptr addrspace(1) undef ret void } @@ -1295,9 +1295,9 @@ define void @void_func_v4f32(<4 x float> %arg0) #0 { ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <4 x float> %arg0, <4 x float> addrspace(1)* undef + store <4 x float> %arg0, ptr addrspace(1) undef ret void } @@ -1316,9 +1316,9 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <8 x float> %arg0, <8 x float> addrspace(1)* undef + store <8 x float> %arg0, ptr addrspace(1) undef ret void } @@ -1345,9 +1345,9 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 { ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <16 x float> %arg0, <16 x float> addrspace(1)* undef + store <16 x float> %arg0, ptr addrspace(1) undef ret void } @@ -1364,9 +1364,9 @@ define void @void_func_v2f64(<2 x double> %arg0) #0 { ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <2 x double> %arg0, <2 x double> addrspace(1)* undef + store <2 x double> %arg0, ptr addrspace(1) undef ret void } @@ -1386,9 +1386,9 @@ define void @void_func_v3f64(<3 x double> %arg0) #0 { ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <3 x double> %arg0, <3 x double> addrspace(1)* undef + store <3 x double> %arg0, ptr addrspace(1) undef ret void } @@ -1411,9 +1411,9 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 { ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <4 x double> %arg0, <4 x double> addrspace(1)* undef + store <4 x double> %arg0, ptr addrspace(1) undef ret void } @@ -1448,9 +1448,9 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 { ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <8 x double> %arg0, <8 x double> addrspace(1)* undef + store <8 x double> %arg0, ptr addrspace(1) undef ret void } @@ -1510,9 +1510,9 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 { ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <16 x double> %arg0, <16 x double> addrspace(1)* undef + store <16 x double> %arg0, ptr addrspace(1) undef ret void } @@ -1523,9 +1523,9 @@ define void @void_func_v2f16(<2 x half> %arg0) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <2 x half> %arg0, <2 x half> addrspace(1)* undef + store <2 x half> %arg0, ptr addrspace(1) undef ret void } @@ -1540,9 +1540,9 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 { ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <3 x half> %arg0, <3 x half> addrspace(1)* undef + store <3 x half> %arg0, ptr addrspace(1) undef ret void } @@ -1555,9 +1555,9 @@ define void @void_func_v4f16(<4 x half> %arg0) #0 { ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <4 x half> %arg0, <4 x half> addrspace(1)* undef + store <4 x half> %arg0, ptr addrspace(1) undef ret void } @@ -1572,9 +1572,9 @@ define void @void_func_v8f16(<8 x half> %arg0) #0 { ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <8 x half> %arg0, <8 x half> addrspace(1)* undef + store <8 x half> %arg0, ptr addrspace(1) undef ret void } @@ -1593,9 +1593,9 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 { ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store <16 x half> %arg0, <16 x half> addrspace(1)* undef + store <16 x half> %arg0, ptr addrspace(1) undef ret void } @@ -1611,14 +1611,13 @@ define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV]](s64), [[COPY4]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store volatile i32 %arg0, i32 addrspace(1)* undef - store volatile i64 %arg1, i64 addrspace(1)* undef - store volatile i32 %arg2, i32 addrspace(1)* undef + store volatile i32 %arg0, ptr addrspace(1) undef + store volatile i64 %arg1, ptr addrspace(1) undef + store volatile i32 %arg2, ptr addrspace(1) undef ret void } @@ -1629,9 +1628,9 @@ define void @void_func_struct_i32({ i32 } %arg0) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `{ i32 } addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store { i32 } %arg0, { i32 } addrspace(1)* undef + store { i32 } %arg0, ptr addrspace(1) undef ret void } @@ -1645,16 +1644,16 @@ define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `ptr addrspace(1) undef` + 4, addrspace 1) ; CHECK-NEXT: SI_RETURN - store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef + store { i8, i32 } %arg0, ptr addrspace(1) undef ret void } -define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0) #0 { +define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 { ; CHECK-LABEL: name: void_func_byval_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 @@ -1664,17 +1663,17 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store (s32) into `ptr addrspace(1) undef` + 4, addrspace 1) ; CHECK-NEXT: SI_RETURN - %arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 - store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef + %arg0.load = load { i8, i32 }, ptr addrspace(5) %arg0 + store { i8, i32 } %arg0.load, ptr addrspace(1) undef ret void } -define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0, { i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg1, i32 %arg2) #0 { +define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }) %arg0, ptr addrspace(5) byval({ i8, i32 }) %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_byval_struct_i8_i32_x2 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 @@ -1693,24 +1692,24 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({ ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile dereferenceable load (s8) from %ir.arg1, align 4, addrspace 5) ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load (s32) from %ir.arg1 + 4, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef` + 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK-NEXT: G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef` + 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: SI_RETURN - %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 - %arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1 - store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef - store volatile { i8, i32 } %arg1.load, { i8, i32 } addrspace(1)* undef - store volatile i32 %arg2, i32 addrspace(3)* undef + %arg0.load = load volatile { i8, i32 }, ptr addrspace(5) %arg0 + %arg1.load = load volatile { i8, i32 }, ptr addrspace(5) %arg1 + store volatile { i8, i32 } %arg0.load, ptr addrspace(1) undef + store volatile { i8, i32 } %arg1.load, ptr addrspace(1) undef + store volatile i32 %arg2, ptr addrspace(3) undef ret void } -define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i64 addrspace(5)* byval(i64) %arg1) #0 { +define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, ptr addrspace(5) byval(i64) %arg1) #0 { ; CHECK-LABEL: name: void_func_byval_i32_byval_i64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 @@ -1718,20 +1717,19 @@ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s64) from %ir.arg1, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s64), [[COPY2]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - %arg0.load = load i32, i32 addrspace(5)* %arg0 - %arg1.load = load i64, i64 addrspace(5)* %arg1 - store i32 %arg0.load, i32 addrspace(1)* undef - store i64 %arg1.load, i64 addrspace(1)* undef + %arg0.load = load i32, ptr addrspace(5) %arg0 + %arg1.load = load i64, ptr addrspace(5) %arg1 + store i32 %arg0.load, ptr addrspace(1) undef + store i64 %arg1.load, ptr addrspace(1) undef ret void } -define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) %arg0, i16 addrspace(5)* byval(i16) align 64 %arg1) #0 { +define void @void_func_byval_i8_align32_i16_align64(ptr addrspace(5) byval(i8) %arg0, ptr addrspace(5) byval(i16) align 64 %arg1) #0 { ; CHECK-LABEL: name: void_func_byval_i8_align32_i16_align64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 @@ -1739,21 +1737,20 @@ define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) % ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, addrspace 5) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[C]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s16), [[COPY2]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[C]](p1) :: (store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s16), [[C]](p1) :: (store (s16) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: SI_RETURN - %arg0.load = load i8, i8 addrspace(5)* %arg0 - %arg1.load = load i16, i16 addrspace(5)* %arg1 - store i8 %arg0.load, i8 addrspace(1)* null - store i16 %arg1.load, i16 addrspace(1)* null + %arg0.load = load i8, ptr addrspace(5) %arg0 + %arg1.load = load i16, ptr addrspace(5) %arg1 + store i8 %arg0.load, ptr addrspace(1) null + store i16 %arg1.load, ptr addrspace(1) null ret void } ; Make sure the alignment is taken from the correct parameter. -define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byval([3 x i32]) align 128 %arg0, i16 addrspace(5)* byval(i16) align 64 %arg1) #0 { +define void @byval_a3i32_align128_byval_i16_align64(ptr addrspace(5) byval([3 x i32]) align 128 %arg0, ptr addrspace(5) byval(i16) align 64 %arg1) #0 { ; CHECK-LABEL: name: byval_a3i32_align128_byval_i16_align64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 @@ -1761,7 +1758,6 @@ define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byva ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) @@ -1770,24 +1766,24 @@ define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byva ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 8, addrspace 5) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 4 - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C4]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C4]](p1) :: (store (s32) into `ptr addrspace(1) null` + 4, addrspace 1) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[C6:%[0-9]+]]:_(p1) = G_CONSTANT i64 8 - ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[C6]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 8, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[COPY2]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[C6]](p1) :: (store (s32) into `ptr addrspace(1) null` + 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[C]](p1) :: (store (s16) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: SI_RETURN - %arg0.load = load [3 x i32], [3 x i32] addrspace(5)* %arg0 - %arg1.load = load i16, i16 addrspace(5)* %arg1 - store [3 x i32] %arg0.load, [3 x i32] addrspace(1)* null - store i16 %arg1.load, i16 addrspace(1)* null + %arg0.load = load [3 x i32], ptr addrspace(5) %arg0 + %arg1.load = load i16, ptr addrspace(5) %arg1 + store [3 x i32] %arg0.load, ptr addrspace(1) null + store i16 %arg1.load, ptr addrspace(1) null ret void } ; byval argument after non-byval stack passed argument -define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrspace(5)* byval(i8) align 8 %arg2) #0 { +define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, ptr addrspace(5) byval(i8) align 8 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_i32_byval_i8 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 @@ -1831,19 +1827,18 @@ define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrs ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX2]](p5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg2, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[COPY32]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[C]](p1) :: (store (s8) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i32 %arg1, i32 addrspace(1)* null - %arg2.load = load i8, i8 addrspace(5)* %arg2 - store i8 %arg2.load, i8 addrspace(1)* null + store i32 %arg1, ptr addrspace(1) null + %arg2.load = load i8, ptr addrspace(5) %arg2 + store i8 %arg2.load, ptr addrspace(1) null ret void } ; byval argument before non-byval stack passed argument -define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* byval(i8) %arg1, i32 %arg2) #0 { +define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, ptr addrspace(5) byval(i8) %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_byval_i8_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 @@ -1887,14 +1882,13 @@ define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* by ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg1, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[COPY32]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[C]](p1) :: (store (s8) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store i32 %arg2, i32 addrspace(1)* null - %arg1.load = load i8, i8 addrspace(5)* %arg1 - store i8 %arg1.load, i8 addrspace(1)* null + store i32 %arg2, ptr addrspace(1) null + %arg1.load = load i8, ptr addrspace(5) %arg1 + store i8 %arg1.load, ptr addrspace(1) null ret void } @@ -1945,15 +1939,13 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[COPY31]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV]](s64), [[COPY32]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile i32 %arg1, i32 addrspace(1)* undef - store volatile i64 %arg2, i64 addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile i32 %arg1, ptr addrspace(1) undef + store volatile i64 %arg2, ptr addrspace(1) undef ret void } @@ -2008,25 +2000,21 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1 ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[COPY31]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[COPY32]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[COPY33]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD4]](s16), [[COPY34]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD4]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile i1 %arg1, i1 addrspace(1)* undef - store volatile i8 %arg2, i8 addrspace(1)* undef - store volatile i16 %arg3, i16 addrspace(1)* undef - store volatile half %arg4, half addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile i1 %arg1, ptr addrspace(1) undef + store volatile i8 %arg2, ptr addrspace(1) undef + store volatile i16 %arg3, ptr addrspace(1) undef + store volatile half %arg4, ptr addrspace(1) undef ret void } -define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, i8 addrspace(3)* %arg1, i8 addrspace(5)* %arg2) #0 { +define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, ptr addrspace(3) %arg1, ptr addrspace(5) %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_p3_p5_i16 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 @@ -2070,15 +2058,13 @@ define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, i8 addrspace(3)* %arg1 ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (p5) from %fixed-stack.0, align 8, addrspace 5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[COPY31]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD2]](p5), [[COPY32]](p1) :: (volatile store (p5) into `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[DEF]](p1) :: (volatile store (p3) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](p5), [[DEF]](p1) :: (volatile store (p5) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile i8 addrspace(3)* %arg1, i8 addrspace(3)* addrspace(1)* undef - store volatile i8 addrspace(5)* %arg2, i8 addrspace(5)* addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile ptr addrspace(3) %arg1, ptr addrspace(1) undef + store volatile ptr addrspace(5) %arg2, ptr addrspace(1) undef ret void } @@ -2132,15 +2118,13 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[COPY31]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[COPY32]](p1) :: (volatile store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile <2 x i32> %arg1, <2 x i32> addrspace(1)* undef - store volatile <2 x float> %arg2, <2 x float> addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile <2 x i32> %arg1, ptr addrspace(1) undef + store volatile <2 x float> %arg2, ptr addrspace(1) undef ret void } @@ -2188,15 +2172,13 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, align 8, addrspace 5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[COPY31]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD2]](<2 x s16>), [[COPY32]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile <2 x i16> %arg1, <2 x i16> addrspace(1)* undef - store volatile <2 x half> %arg2, <2 x half> addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile <2 x i16> %arg1, ptr addrspace(1) undef + store volatile <2 x half> %arg2, ptr addrspace(1) undef ret void } @@ -2262,15 +2244,13 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD7]](s32), [[LOAD8]](s32) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[COPY31]](p1) :: (volatile store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[COPY32]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile <2 x i64> %arg1, <2 x i64> addrspace(1)* undef - store volatile <2 x double> %arg2, <2 x double> addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile <2 x i64> %arg1, ptr addrspace(1) undef + store volatile <2 x double> %arg2, ptr addrspace(1) undef ret void } @@ -2332,15 +2312,13 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[COPY31]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY32]](p1) :: (volatile store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile <4 x i32> %arg1, <4 x i32> addrspace(1)* undef - store volatile <4 x float> %arg2, <4 x float> addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile <4 x i32> %arg1, ptr addrspace(1) undef + store volatile <4 x float> %arg2, ptr addrspace(1) undef ret void } @@ -2418,15 +2396,13 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32), [[LOAD16]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[COPY31]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[COPY32]](p1) :: (volatile store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile <8 x i32> %arg1, <8 x i32> addrspace(1)* undef - store volatile <8 x float> %arg2, <8 x float> addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile <8 x i32> %arg1, ptr addrspace(1) undef + store volatile <8 x float> %arg2, ptr addrspace(1) undef ret void } @@ -2536,15 +2512,13 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, ; CHECK-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32), [[LOAD32]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[COPY31]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[COPY32]](p1) :: (volatile store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile <16 x i32> %arg1, <16 x i32> addrspace(1)* undef - store volatile <16 x float> %arg2, <16 x float> addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile <16 x i32> %arg1, ptr addrspace(1) undef + store volatile <16 x float> %arg2, ptr addrspace(1) undef ret void } @@ -2563,22 +2537,21 @@ define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) - ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[COPY4]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: SI_RETURN %arg0.0 = extractelement <3 x float> %arg0, i32 0 %arg0.1 = extractelement <3 x float> %arg0, i32 1 %arg0.2 = extractelement <3 x float> %arg0, i32 2 - store volatile float %arg0.0, float addrspace(3)* undef - store volatile float %arg0.1, float addrspace(3)* undef - store volatile float %arg0.2, float addrspace(3)* undef - store volatile i32 %arg1, i32 addrspace(3)* undef + store volatile float %arg0.0, ptr addrspace(3) undef + store volatile float %arg0.1, ptr addrspace(3) undef + store volatile float %arg0.2, ptr addrspace(3) undef + store volatile i32 %arg1, ptr addrspace(3) undef ret void } @@ -2599,18 +2572,18 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) - ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: SI_RETURN %arg0.0 = extractelement <3 x i32> %arg0, i32 0 %arg0.1 = extractelement <3 x i32> %arg0, i32 1 %arg0.2 = extractelement <3 x i32> %arg0, i32 2 - store volatile i32 %arg0.0, i32 addrspace(3)* undef - store volatile i32 %arg0.1, i32 addrspace(3)* undef - store volatile i32 %arg0.2, i32 addrspace(3)* undef - store volatile i32 %arg1, i32 addrspace(3)* undef + store volatile i32 %arg0.0, ptr addrspace(3) undef + store volatile i32 %arg0.1, ptr addrspace(3) undef + store volatile i32 %arg0.2, ptr addrspace(3) undef + store volatile i32 %arg1, ptr addrspace(3) undef ret void } @@ -2655,9 +2628,9 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 { ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16), [[TRUNC12]](s16), [[TRUNC13]](s16), [[TRUNC14]](s16), [[TRUNC15]](s16) ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[TRUNC16]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC16]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store volatile <16 x i8> %arg0, <16 x i8> addrspace(1)* undef + store volatile <16 x i8> %arg0, ptr addrspace(1) undef ret void } @@ -2736,16 +2709,15 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[LOAD1]](s16), [[LOAD2]](s16), [[LOAD3]](s16), [[LOAD4]](s16), [[LOAD5]](s16), [[LOAD6]](s16), [[LOAD7]](s16), [[LOAD8]](s16), [[LOAD9]](s16), [[LOAD10]](s16), [[LOAD11]](s16), [[LOAD12]](s16), [[LOAD13]](s16), [[LOAD14]](s16), [[LOAD15]](s16), [[LOAD16]](s16) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC]](<16 x s8>), [[COPY31]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile <16 x i8> %arg1, <16 x i8> addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile <16 x i8> %arg1, ptr addrspace(1) undef ret void } -define void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %arg0, i8 %pad, {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1) { +define void @pointer_in_struct_argument({ptr addrspace(3), ptr addrspace(1)} %arg0, i8 %pad, {ptr addrspace(3), ptr addrspace(1234)} %arg1) { ; CHECK-LABEL: name: pointer_in_struct_argument ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 @@ -2762,28 +2734,25 @@ define void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %ar ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1234) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK-NEXT: G_STORE [[COPY]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV]](p1), [[COPY7]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[COPY8]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV1]](p1234), [[COPY9]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: SI_RETURN - %val0 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 0 - %val1 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 1 - %val2 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 0 - %val3 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 1 - store volatile i8 addrspace(3)* %val0, i8 addrspace(3)* addrspace(1)* null - store volatile i8 addrspace(1)* %val1, i8 addrspace(1)* addrspace(1)* null - store volatile i8 %pad, i8 addrspace(1)* null - store volatile i8 addrspace(3)* %val2, i8 addrspace(3)* addrspace(1)* null - store volatile i8 addrspace(1234)* %val3, i8 addrspace(1234)* addrspace(1)* null - ret void -} - -define void @vector_ptr_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg) { + ; CHECK-NEXT: G_STORE [[COPY]](p3), [[C]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](p1), [[C]](p1) :: (volatile store (p1) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[C]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV1]](p1234), [[C]](p1) :: (volatile store (p1234) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: SI_RETURN + %val0 = extractvalue {ptr addrspace(3), ptr addrspace(1)} %arg0, 0 + %val1 = extractvalue {ptr addrspace(3), ptr addrspace(1)} %arg0, 1 + %val2 = extractvalue {ptr addrspace(3), ptr addrspace(1234)} %arg1, 0 + %val3 = extractvalue {ptr addrspace(3), ptr addrspace(1234)} %arg1, 1 + store volatile ptr addrspace(3) %val0, ptr addrspace(1) null + store volatile ptr addrspace(1) %val1, ptr addrspace(1) null + store volatile i8 %pad, ptr addrspace(1) null + store volatile ptr addrspace(3) %val2, ptr addrspace(1) null + store volatile ptr addrspace(1234) %val3, ptr addrspace(1) null + ret void +} + +define void @vector_ptr_in_struct_arg({ <2 x ptr addrspace(1)>, <2 x ptr addrspace(3)> } %arg) { ; CHECK-LABEL: name: vector_ptr_in_struct_arg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 @@ -2799,12 +2768,12 @@ define void @vector_ptr_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspac ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p3) = COPY $vgpr5 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY4]](p3), [[COPY5]](p3) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x p3>), [[PTR_ADD]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x p3>), [[PTR_ADD]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef` + 16, align 16, addrspace 1) ; CHECK-NEXT: SI_RETURN - store { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg, { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef + store { <2 x ptr addrspace(1)>, <2 x ptr addrspace(3)> } %arg, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.v2i65.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.v2i65.ll index 4611f170bf2fd..22035a5ac1756 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.v2i65.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.v2i65.ll @@ -2,6 +2,6 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -stop-after=irtranslator -verify-machineinstrs -o - %s define void @void_func_v2i65(<2 x i65> %arg0) #0 { - store <2 x i65> %arg0, <2 x i65> addrspace(1)* undef + store <2 x i65> %arg0, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll index 2e660884e7db1..2321cca252b83 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -stop-after=irtranslator -o - %s | FileCheck %s ; Test 64-bit pointer with 64-bit index -define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64(<2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx) { +define <2 x ptr addrspace(1)> @vector_gep_v2p1_index_v2i64(<2 x ptr addrspace(1)> %ptr, <2 x i64> %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 @@ -32,12 +32,12 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64(<2 x i32 addrspace(1 ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx - ret <2 x i32 addrspace(1)*> %gep + %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, <2 x i64> %idx + ret <2 x ptr addrspace(1)> %gep } ; Test 32-bit pointer with 32-bit index -define <2 x i32 addrspace(3)*> @vector_gep_v2p3_index_v2i32(<2 x i32 addrspace(3)*> %ptr, <2 x i32> %idx) { +define <2 x ptr addrspace(3)> @vector_gep_v2p3_index_v2i32(<2 x ptr addrspace(3)> %ptr, <2 x i32> %idx) { ; CHECK-LABEL: name: vector_gep_v2p3_index_v2i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -57,12 +57,12 @@ define <2 x i32 addrspace(3)*> @vector_gep_v2p3_index_v2i32(<2 x i32 addrspace(3 ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %gep = getelementptr i32, <2 x i32 addrspace(3)*> %ptr, <2 x i32> %idx - ret <2 x i32 addrspace(3)*> %gep + %gep = getelementptr i32, <2 x ptr addrspace(3)> %ptr, <2 x i32> %idx + ret <2 x ptr addrspace(3)> %gep } ; Test 64-bit pointer with 32-bit index -define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i32(<2 x i32 addrspace(1)*> %ptr, <2 x i32> %idx) { +define <2 x ptr addrspace(1)> @vector_gep_v2p1_index_v2i32(<2 x ptr addrspace(1)> %ptr, <2 x i32> %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 @@ -89,12 +89,12 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i32(<2 x i32 addrspace(1 ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i32> %idx - ret <2 x i32 addrspace(1)*> %gep + %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, <2 x i32> %idx + ret <2 x ptr addrspace(1)> %gep } ; Test 64-bit pointer with 64-bit scalar index -define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i64(<2 x i32 addrspace(1)*> %ptr, i64 %idx) { +define <2 x ptr addrspace(1)> @vector_gep_v2p1_index_i64(<2 x ptr addrspace(1)> %ptr, i64 %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_i64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 @@ -122,12 +122,12 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i64(<2 x i32 addrspace(1)* ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, i64 %idx - ret <2 x i32 addrspace(1)*> %gep + %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, i64 %idx + ret <2 x ptr addrspace(1)> %gep } ; Test 64-bit pointer with 32-bit scalar index -define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i32(<2 x i32 addrspace(1)*> %ptr, i32 %idx) { +define <2 x ptr addrspace(1)> @vector_gep_v2p1_index_i32(<2 x ptr addrspace(1)> %ptr, i32 %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 @@ -153,12 +153,12 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i32(<2 x i32 addrspace(1)* ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, i32 %idx - ret <2 x i32 addrspace(1)*> %gep + %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, i32 %idx + ret <2 x ptr addrspace(1)> %gep } ; Test 64-bit pointer with 64-bit constant, non-splat -define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx) { +define <2 x ptr addrspace(1)> @vector_gep_v2p1_index_v2i64_constant(<2 x ptr addrspace(1)> %ptr, <2 x i64> %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i64_constant ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 @@ -192,6 +192,6 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 ad ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i64> - ret <2 x i32 addrspace(1)*> %gep + %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, <2 x i64> + ret <2 x ptr addrspace(1)> %gep } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll index 3e8197462e313..72cf3a08e7ffb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s -define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) { +define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) { ; CHECK-LABEL: name: test_indirect_call_sgpr_ptr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 @@ -17,7 +17,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset.cast, align 16, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset1, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] @@ -56,7 +56,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) { ret void } -define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(void()* %fptr) { +define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(ptr %fptr) { ; CHECK-LABEL: name: test_gfx_indirect_call_sgpr_ptr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll index 89235e3f05de2..ac12b4f19bcc0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -197,7 +197,7 @@ entry: ret float %0 } -define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind { +define i32 @test_memory_constraint(ptr addrspace(3) %a) nounwind { ; CHECK-LABEL: name: test_memory_constraint ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 @@ -207,7 +207,7 @@ define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind { ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(i32 addrspace(3)* elementtype(i32) %a) + %1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(ptr addrspace(3) elementtype(i32) %a) ret i32 %1 } @@ -265,17 +265,17 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %3 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %4 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %5 - ; CHECK-NEXT: G_STORE [[COPY6]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY6]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %asm = call {i32, i32, i32} asm sideeffect "; ", "=v,=v,=v,0,2,1"(i32 %c, i32 %a, i32 %b) %asmresult0 = extractvalue {i32, i32, i32} %asm, 0 - store i32 %asmresult0, i32 addrspace(1)* undef + store i32 %asmresult0, ptr addrspace(1) undef %asmresult1 = extractvalue {i32, i32, i32} %asm, 1 - store i32 %asmresult1, i32 addrspace(1)* undef + store i32 %asmresult1, ptr addrspace(1) undef %asmresult2 = extractvalue {i32, i32, i32} %asm, 2 - store i32 %asmresult2, i32 addrspace(1)* undef + store i32 %asmresult2, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll index 45db580e15458..7c7a600c4bda2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll @@ -54,7 +54,7 @@ define { i32, i64 } @load_const_struct_gv() { ret { i32, i64 } %load } -define void @test_memcpy_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src) { +define void @test_memcpy_p1_constaddr_i64(ptr addrspace(1) %dst, ptr addrspace(4) %src) { ; CHECK-LABEL: name: test_memcpy_p1_constaddr_i64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -68,11 +68,11 @@ define void @test_memcpy_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrspace(4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[MV1]](p4), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4) ; CHECK-NEXT: SI_RETURN - call void @llvm.memcpy.p1.p4.i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src, i64 32, i1 false) + call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) %dst, ptr addrspace(4) %src, i64 32, i1 false) ret void } -define void @test_memcpy_inline_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src) { +define void @test_memcpy_inline_p1_constaddr_i64(ptr addrspace(1) %dst, ptr addrspace(4) %src) { ; CHECK-LABEL: name: test_memcpy_inline_p1_constaddr_i64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -86,11 +86,11 @@ define void @test_memcpy_inline_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrs ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: G_MEMCPY_INLINE [[MV]](p1), [[MV1]](p4), [[C]](s64) :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4) ; CHECK-NEXT: SI_RETURN - call void @llvm.memcpy.inline.p1.p4.i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src, i64 32, i1 false) + call void @llvm.memcpy.inline.p1.p4.i64(ptr addrspace(1) %dst, ptr addrspace(4) %src, i64 32, i1 false) ret void } -define void @test_memmove_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src) { +define void @test_memmove_p1_constaddr_i64(ptr addrspace(1) %dst, ptr addrspace(4) %src) { ; CHECK-LABEL: name: test_memmove_p1_constaddr_i64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -104,7 +104,7 @@ define void @test_memmove_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrspace(4 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[MV1]](p4), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4) ; CHECK-NEXT: SI_RETURN - call void @llvm.memmove.p1.p4.i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src, i64 32, i1 false) + call void @llvm.memmove.p1.p4.i64(ptr addrspace(1) %dst, ptr addrspace(4) %src, i64 32, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll index 8e90bd593a108..7fae9f8bb8cac 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll @@ -3,7 +3,7 @@ ; Size operand should be the minimum of the two pointer sizes. -define void @test_memcpy_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { +define void @test_memcpy_p1_p3_i64(ptr addrspace(1) %dst, ptr addrspace(3) %src) { ; CHECK-LABEL: name: test_memcpy_p1_p3_i64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -16,11 +16,11 @@ define void @test_memcpy_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN - call void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false) + call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) %dst, ptr addrspace(3) %src, i64 256, i1 false) ret void } -define void @test_memcpy_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { +define void @test_memcpy_p1_p3_i32(ptr addrspace(1) %dst, ptr addrspace(3) %src) { ; CHECK-LABEL: name: test_memcpy_p1_p3_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -32,11 +32,11 @@ define void @test_memcpy_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false) + call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 256, i1 false) ret void } -define void @test_memcpy_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { +define void @test_memcpy_p1_p3_i16(ptr addrspace(1) %dst, ptr addrspace(3) %src) { ; CHECK-LABEL: name: test_memcpy_p1_p3_i16 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -49,11 +49,11 @@ define void @test_memcpy_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN - call void @llvm.memcpy.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false) + call void @llvm.memcpy.p1.p3.i16(ptr addrspace(1) %dst, ptr addrspace(3) %src, i16 256, i1 false) ret void } -define void @test_memcpy_p3_p1_i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) { +define void @test_memcpy_p3_p1_i64(ptr addrspace(3) %dst, ptr addrspace(1) %src) { ; CHECK-LABEL: name: test_memcpy_p3_p1_i64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -66,11 +66,11 @@ define void @test_memcpy_p3_p1_i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) ; CHECK-NEXT: SI_RETURN - call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i64 256, i1 false) + call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) %dst, ptr addrspace(1) %src, i64 256, i1 false) ret void } -define void @test_memcpy_p3_p1_i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) { +define void @test_memcpy_p3_p1_i32(ptr addrspace(3) %dst, ptr addrspace(1) %src) { ; CHECK-LABEL: name: test_memcpy_p3_p1_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -82,11 +82,11 @@ define void @test_memcpy_p3_p1_i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) ; CHECK-NEXT: SI_RETURN - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 256, i1 false) + call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) %dst, ptr addrspace(1) %src, i32 256, i1 false) ret void } -define void @test_memcpy_p3_p1_i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) { +define void @test_memcpy_p3_p1_i16(ptr addrspace(3) %dst, ptr addrspace(1) %src) { ; CHECK-LABEL: name: test_memcpy_p3_p1_i16 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -99,11 +99,11 @@ define void @test_memcpy_p3_p1_i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) ; CHECK-NEXT: SI_RETURN - call void @llvm.memcpy.p3i8.p1i8.i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i16 256, i1 false) + call void @llvm.memcpy.p3.p1.i16(ptr addrspace(3) %dst, ptr addrspace(1) %src, i16 256, i1 false) ret void } -define void @test_memmove_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { +define void @test_memmove_p1_p3_i64(ptr addrspace(1) %dst, ptr addrspace(3) %src) { ; CHECK-LABEL: name: test_memmove_p1_p3_i64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -116,11 +116,11 @@ define void @test_memmove_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN - call void @llvm.memmove.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false) + call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) %dst, ptr addrspace(3) %src, i64 256, i1 false) ret void } -define void @test_memmove_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { +define void @test_memmove_p1_p3_i32(ptr addrspace(1) %dst, ptr addrspace(3) %src) { ; CHECK-LABEL: name: test_memmove_p1_p3_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -132,11 +132,11 @@ define void @test_memmove_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN - call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false) + call void @llvm.memmove.p1.p3.i32(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 256, i1 false) ret void } -define void @test_memmove_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { +define void @test_memmove_p1_p3_i16(ptr addrspace(1) %dst, ptr addrspace(3) %src) { ; CHECK-LABEL: name: test_memmove_p1_p3_i16 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -149,11 +149,11 @@ define void @test_memmove_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN - call void @llvm.memmove.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false) + call void @llvm.memmove.p1.p3.i16(ptr addrspace(1) %dst, ptr addrspace(3) %src, i16 256, i1 false) ret void } -define void @test_memset_p1_i64(i8 addrspace(1)* %dst, i8 %val) { +define void @test_memset_p1_i64(ptr addrspace(1) %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p1_i64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -166,11 +166,11 @@ define void @test_memset_p1_i64(i8 addrspace(1)* %dst, i8 %val) { ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) ; CHECK-NEXT: SI_RETURN - call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 256, i1 false) + call void @llvm.memset.p1.i64(ptr addrspace(1) %dst, i8 %val, i64 256, i1 false) ret void } -define void @test_memset_p1_i32(i8 addrspace(1)* %dst, i8 %val) { +define void @test_memset_p1_i32(ptr addrspace(1) %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p1_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -184,11 +184,11 @@ define void @test_memset_p1_i32(i8 addrspace(1)* %dst, i8 %val) { ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) ; CHECK-NEXT: SI_RETURN - call void @llvm.memset.p1i8.i32(i8 addrspace(1)* %dst, i8 %val, i32 256, i1 false) + call void @llvm.memset.p1.i32(ptr addrspace(1) %dst, i8 %val, i32 256, i1 false) ret void } -define void @test_memset_p1_i16(i8 addrspace(1)* %dst, i8 %val) { +define void @test_memset_p1_i16(ptr addrspace(1) %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p1_i16 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -202,11 +202,11 @@ define void @test_memset_p1_i16(i8 addrspace(1)* %dst, i8 %val) { ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) ; CHECK-NEXT: SI_RETURN - call void @llvm.memset.p1i8.i16(i8 addrspace(1)* %dst, i8 %val, i16 256, i1 false) + call void @llvm.memset.p1.i16(ptr addrspace(1) %dst, i8 %val, i16 256, i1 false) ret void } -define void @test_memset_p3_i64(i8 addrspace(3)* %dst, i8 %val) { +define void @test_memset_p3_i64(ptr addrspace(3) %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p3_i64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 @@ -218,11 +218,11 @@ define void @test_memset_p3_i64(i8 addrspace(3)* %dst, i8 %val) { ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) ; CHECK-NEXT: SI_RETURN - call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %dst, i8 %val, i64 256, i1 false) + call void @llvm.memset.p3.i64(ptr addrspace(3) %dst, i8 %val, i64 256, i1 false) ret void } -define void @test_memset_p3_i32(i8 addrspace(3)* %dst, i8 %val) { +define void @test_memset_p3_i32(ptr addrspace(3) %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p3_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 @@ -233,11 +233,11 @@ define void @test_memset_p3_i32(i8 addrspace(3)* %dst, i8 %val) { ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) ; CHECK-NEXT: SI_RETURN - call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %dst, i8 %val, i32 256, i1 false) + call void @llvm.memset.p3.i32(ptr addrspace(3) %dst, i8 %val, i32 256, i1 false) ret void } -define void @test_memset_p3_i16(i8 addrspace(3)* %dst, i8 %val) { +define void @test_memset_p3_i16(ptr addrspace(3) %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p3_i16 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 @@ -249,25 +249,25 @@ define void @test_memset_p3_i16(i8 addrspace(3)* %dst, i8 %val) { ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) ; CHECK-NEXT: SI_RETURN - call void @llvm.memset.p3i8.i16(i8 addrspace(3)* %dst, i8 %val, i16 256, i1 false) + call void @llvm.memset.p3.i16(ptr addrspace(3) %dst, i8 %val, i16 256, i1 false) ret void } -declare void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(3)* noalias nocapture readonly, i64, i1 immarg) #0 -declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(3)* noalias nocapture readonly, i32, i1 immarg) #0 -declare void @llvm.memcpy.p1i8.p3i8.i16(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(3)* noalias nocapture readonly, i16, i1 immarg) #0 -declare void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i64, i1 immarg) #0 -declare void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i32, i1 immarg) #0 -declare void @llvm.memcpy.p3i8.p1i8.i16(i8 addrspace(3)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i16, i1 immarg) #0 -declare void @llvm.memmove.p1i8.p3i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i64, i1 immarg) #0 -declare void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1 immarg) #0 -declare void @llvm.memmove.p1i8.p3i8.i16(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i16, i1 immarg) #0 -declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture writeonly, i8, i64, i1 immarg) #1 -declare void @llvm.memset.p1i8.i32(i8 addrspace(1)* nocapture writeonly, i8, i32, i1 immarg) #1 -declare void @llvm.memset.p1i8.i16(i8 addrspace(1)* nocapture writeonly, i8, i16, i1 immarg) #1 -declare void @llvm.memset.p3i8.i64(i8 addrspace(3)* nocapture writeonly, i8, i64, i1 immarg) #1 -declare void @llvm.memset.p3i8.i32(i8 addrspace(3)* nocapture writeonly, i8, i32, i1 immarg) #1 -declare void @llvm.memset.p3i8.i16(i8 addrspace(3)* nocapture writeonly, i8, i16, i1 immarg) #1 +declare void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #0 +declare void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i32, i1 immarg) #0 +declare void @llvm.memcpy.p1.p3.i16(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i16, i1 immarg) #0 +declare void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #0 +declare void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i32, i1 immarg) #0 +declare void @llvm.memcpy.p3.p1.i16(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i16, i1 immarg) #0 +declare void @llvm.memmove.p1.p3.i64(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i64, i1 immarg) #0 +declare void @llvm.memmove.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1 immarg) #0 +declare void @llvm.memmove.p1.p3.i16(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i16, i1 immarg) #0 +declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture writeonly, i8, i64, i1 immarg) #1 +declare void @llvm.memset.p1.i32(ptr addrspace(1) nocapture writeonly, i8, i32, i1 immarg) #1 +declare void @llvm.memset.p1.i16(ptr addrspace(1) nocapture writeonly, i8, i16, i1 immarg) #1 +declare void @llvm.memset.p3.i64(ptr addrspace(3) nocapture writeonly, i8, i64, i1 immarg) #1 +declare void @llvm.memset.p3.i32(ptr addrspace(3) nocapture writeonly, i8, i32, i1 immarg) #1 +declare void @llvm.memset.p3.i16(ptr addrspace(3) nocapture writeonly, i8, i16, i1 immarg) #1 attributes #0 = { argmemonly nounwind willreturn } attributes #1 = { argmemonly nounwind willreturn writeonly } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll index dab8b64e75f45..c182cff9329f4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - -stop-after=irtranslator %s | FileCheck %s ; Check that the CSEMIRBuilder doesn't fold away the getelementptr during IRTranslator -define i8 addrspace(7)* @no_auto_constfold_gep() { +define ptr addrspace(7) @no_auto_constfold_gep() { ; CHECK-LABEL: name: no_auto_constfold_gep ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(p7) = G_CONSTANT i64 0 @@ -12,12 +12,12 @@ define i8 addrspace(7)* @no_auto_constfold_gep() { ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %gep = getelementptr i8, i8 addrspace(7)* null, i64 123 - ret i8 addrspace(7)* %gep + %gep = getelementptr i8, ptr addrspace(7) null, i64 123 + ret ptr addrspace(7) %gep } ; Check that the CSEMIRBuilder doesn't fold away the getelementptr during IRTranslator -define <2 x i8 addrspace(7)*> @no_auto_constfold_gep_vector() { +define <2 x ptr addrspace(7)> @no_auto_constfold_gep_vector() { ; CHECK-LABEL: name: no_auto_constfold_gep_vector ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(p7) = G_CONSTANT i64 0 @@ -32,6 +32,6 @@ define <2 x i8 addrspace(7)*> @no_auto_constfold_gep_vector() { ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - %gep = getelementptr i8, <2 x i8 addrspace(7)*> zeroinitializer, <2 x i64> - ret <2 x i8 addrspace(7)*> %gep + %gep = getelementptr i8, <2 x ptr addrspace(7)> zeroinitializer, <2 x i64> + ret <2 x ptr addrspace(7)> %gep } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll index af0daceac6c61..8eb0658f8023b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -stop-after=irtranslator < %s | FileCheck %s -define i8* @ptrmask_flat_i64(i8* %ptr, i64 %mask) { +define ptr @ptrmask_flat_i64(ptr %ptr, i64 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -17,11 +17,11 @@ define i8* @ptrmask_flat_i64(i8* %ptr, i64 %mask) { ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %ptr, i64 %mask) - ret i8* %masked + %masked = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 %mask) + ret ptr %masked } -define i8* @ptrmask_flat_i32(i8* %ptr, i32 %mask) { +define ptr @ptrmask_flat_i32(ptr %ptr, i32 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -35,11 +35,11 @@ define i8* @ptrmask_flat_i32(i8* %ptr, i32 %mask) { ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %masked = call i8* @llvm.ptrmask.p0i8.i32(i8* %ptr, i32 %mask) - ret i8* %masked + %masked = call ptr @llvm.ptrmask.p0.i32(ptr %ptr, i32 %mask) + ret ptr %masked } -define i8* @ptrmask_flat_i16(i8* %ptr, i16 %mask) { +define ptr @ptrmask_flat_i16(ptr %ptr, i16 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i16 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -54,11 +54,11 @@ define i8* @ptrmask_flat_i16(i8* %ptr, i16 %mask) { ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %masked = call i8* @llvm.ptrmask.p0i8.i16(i8* %ptr, i16 %mask) - ret i8* %masked + %masked = call ptr @llvm.ptrmask.p0.i16(ptr %ptr, i16 %mask) + ret ptr %masked } -define i8* @ptrmask_flat_i1(i8* %ptr, i1 %mask) { +define ptr @ptrmask_flat_i1(ptr %ptr, i1 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i1 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -73,11 +73,11 @@ define i8* @ptrmask_flat_i1(i8* %ptr, i1 %mask) { ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 - %masked = call i8* @llvm.ptrmask.p0i8.i1(i8* %ptr, i1 %mask) - ret i8* %masked + %masked = call ptr @llvm.ptrmask.p0.i1(ptr %ptr, i1 %mask) + ret ptr %masked } -define i8 addrspace(3)* @ptrmask_local_i64(i8 addrspace(3)* %ptr, i64 %mask) { +define ptr addrspace(3) @ptrmask_local_i64(ptr addrspace(3) %ptr, i64 %mask) { ; CHECK-LABEL: name: ptrmask_local_i64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -89,11 +89,11 @@ define i8 addrspace(3)* @ptrmask_local_i64(i8 addrspace(3)* %ptr, i64 %mask) { ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[MV]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask) - ret i8 addrspace(3)* %masked + %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) %ptr, i64 %mask) + ret ptr addrspace(3) %masked } -define i8 addrspace(3)* @ptrmask_local_i32(i8 addrspace(3)* %ptr, i32 %mask) { +define ptr addrspace(3) @ptrmask_local_i32(ptr addrspace(3) %ptr, i32 %mask) { ; CHECK-LABEL: name: ptrmask_local_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 @@ -103,11 +103,11 @@ define i8 addrspace(3)* @ptrmask_local_i32(i8 addrspace(3)* %ptr, i32 %mask) { ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask) - ret i8 addrspace(3)* %masked + %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) %ptr, i32 %mask) + ret ptr addrspace(3) %masked } -define i8 addrspace(3)* @ptrmask_local_i16(i8 addrspace(3)* %ptr, i16 %mask) { +define ptr addrspace(3) @ptrmask_local_i16(ptr addrspace(3) %ptr, i16 %mask) { ; CHECK-LABEL: name: ptrmask_local_i16 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 @@ -118,11 +118,11 @@ define i8 addrspace(3)* @ptrmask_local_i16(i8 addrspace(3)* %ptr, i16 %mask) { ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask) - ret i8 addrspace(3)* %masked + %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i16(ptr addrspace(3) %ptr, i16 %mask) + ret ptr addrspace(3) %masked } -define i8 addrspace(3)* @ptrmask_local_i1(i8 addrspace(3)* %ptr, i1 %mask) { +define ptr addrspace(3) @ptrmask_local_i1(ptr addrspace(3) %ptr, i1 %mask) { ; CHECK-LABEL: name: ptrmask_local_i1 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 @@ -133,21 +133,21 @@ define i8 addrspace(3)* @ptrmask_local_i1(i8 addrspace(3)* %ptr, i1 %mask) { ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s1) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 - %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i1(i8 addrspace(3)* %ptr, i1 %mask) - ret i8 addrspace(3)* %masked + %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i1(ptr addrspace(3) %ptr, i1 %mask) + ret ptr addrspace(3) %masked } ; Seems to not work -; define <2 x i8*> @ptrmask_flat_i64_v2(<2 x i8*> %ptr, <2 x i64> %mask) { -; %masked = call <2 x i8*> @llvm.ptrmask.v2p0i8.v2i64(<2 x i8*> %ptr, <2 x i64> %mask) -; ret <2 x i8*> %masked +; define <2 x ptr> @ptrmask_flat_i64_v2(<2 x ptr> %ptr, <2 x i64> %mask) { +; %masked = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %ptr, <2 x i64> %mask) +; ret <2 x ptr> %masked ; } -declare i8* @llvm.ptrmask.p0i8.i64(i8*, i64) -declare i8* @llvm.ptrmask.p0i8.i32(i8*, i32) -declare i8* @llvm.ptrmask.p0i8.i16(i8*, i16) -declare i8* @llvm.ptrmask.p0i8.i1(i8*, i1) -declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)*, i64) -declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)*, i32) -declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)*, i16) -declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i1(i8 addrspace(3)*, i1) +declare ptr @llvm.ptrmask.p0.i64(ptr, i64) +declare ptr @llvm.ptrmask.p0.i32(ptr, i32) +declare ptr @llvm.ptrmask.p0.i16(ptr, i16) +declare ptr @llvm.ptrmask.p0.i1(ptr, i1) +declare ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3), i64) +declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32) +declare ptr addrspace(3) @llvm.ptrmask.p3.i16(ptr addrspace(3), i16) +declare ptr addrspace(3) @llvm.ptrmask.p3.i1(ptr addrspace(3), i1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-readnone-intrinsic-callsite.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-readnone-intrinsic-callsite.ll index 127430f569e7c..2eab2a458e11e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-readnone-intrinsic-callsite.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-readnone-intrinsic-callsite.ll @@ -9,9 +9,9 @@ ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.getreg) define amdgpu_kernel void @getreg_callsite_attributes() { %reg0 = call i32 @llvm.amdgcn.s.getreg(i32 0) - store volatile i32 %reg0, i32 addrspace(1)* undef + store volatile i32 %reg0, ptr addrspace(1) undef %reg1 = call i32 @llvm.amdgcn.s.getreg(i32 0) #1 - store volatile i32 %reg1, i32 addrspace(1)* undef + store volatile i32 %reg1, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll index 20208f1336625..967979e76dd8a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -32,8 +32,8 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 { ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 %alloca = alloca [16 x i32], align 4, addrspace(5) - %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 - store volatile i32 9, i32 addrspace(5)* %gep + %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5 + store volatile i32 9, ptr addrspace(5) %gep %add0 = add i32 %arg0, %arg1 ret i32 %add0 } @@ -78,8 +78,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) - %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 - store volatile i32 9, i32 addrspace(5)* %gep + %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5 + store volatile i32 9, ptr addrspace(5) %gep %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) ret i32 %ret } @@ -105,8 +105,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) - %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 - store volatile i32 9, i32 addrspace(5)* %gep + %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5 + store volatile i32 9, ptr addrspace(5) %gep %ret = tail call fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b) ret i32 %ret } @@ -140,11 +140,9 @@ define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<2 x s32>) from %ir.0, align 16, addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<2 x s32>) from %ir.a.kernarg.offset1, align 16, addrspace 4) ; GCN-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C]](s32) ; GCN-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C1]](s32) - ; GCN-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C2]](s64) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[EVEC]](s32) @@ -160,7 +158,7 @@ entry: ret void } -define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)* byval(i32) align 4 %arg1) #1 { +define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, ptr addrspace(5) byval(i32) align 4 %arg1) #1 { ; GCN-LABEL: name: i32_fastcc_i32_byval_i32 ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 @@ -172,13 +170,13 @@ define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)* ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[LOAD]] ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 - %arg1.load = load i32, i32 addrspace(5)* %arg1, align 4 + %arg1.load = load i32, ptr addrspace(5) %arg1, align 4 %add0 = add i32 %arg0, %arg1.load ret i32 %add0 } ; Tail call disallowed with byval in parent. -define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i32 addrspace(5)* byval(i32) %b.byval, i32 %c) #1 { +define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, ptr addrspace(5) byval(i32) %b.byval, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32_byval_parent ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1 @@ -203,7 +201,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i3 ; GCN-NEXT: $vgpr0 = COPY [[COPY4]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: - %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) %b.byval) + %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, ptr addrspace(5) byval(i32) %b.byval) ret i32 %ret } @@ -255,13 +253,13 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %lar ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5) + ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `ptr addrspace(5) inttoptr (i32 16 to ptr addrspace(5))`, align 16, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: - %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) inttoptr (i32 16 to i32 addrspace(5)*)) + %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, ptr addrspace(5) byval(i32) inttoptr (i32 16 to ptr addrspace(5))) ret i32 %ret } @@ -498,8 +496,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) - %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 - store volatile i32 9, i32 addrspace(5)* %gep + %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5 + store volatile i32 9, ptr addrspace(5) %gep %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) ret i32 %ret } @@ -693,8 +691,8 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i3 ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) - %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 - store volatile i32 9, i32 addrspace(5)* %gep + %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5 + store volatile i32 9, ptr addrspace(5) %gep %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) ret i32 %ret } @@ -798,13 +796,13 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) - %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 - store volatile i32 9, i32 addrspace(5)* %gep + %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5 + store volatile i32 9, ptr addrspace(5) %gep %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer) ret i32 %ret } -declare hidden void @void_fastcc_multi_byval(i32 %a, [3 x i32] addrspace(5)* byval([3 x i32]) align 16, [2 x i64] addrspace(5)* byval([2 x i64])) +declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64])) define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-LABEL: name: sibling_call_fastcc_multi_byval @@ -965,13 +963,13 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { entry: %alloca0 = alloca [3 x i32], align 16, addrspace(5) %alloca1 = alloca [2 x i64], align 8, addrspace(5) - store [3 x i32] [i32 9, i32 9, i32 9], [3 x i32] addrspace(5)* %alloca0 - store [2 x i64] zeroinitializer, [2 x i64] addrspace(5)* %alloca1 - tail call fastcc void @void_fastcc_multi_byval(i32 %a, [3 x i32] addrspace(5)* byval([3 x i32]) %alloca0, [2 x i64] addrspace(5)* byval([2 x i64]) %alloca1) + store [3 x i32] [i32 9, i32 9, i32 9], ptr addrspace(5) %alloca0 + store [2 x i64] zeroinitializer, ptr addrspace(5) %alloca1 + tail call fastcc void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) %alloca0, ptr addrspace(5) byval([2 x i64]) %alloca1) ret void } -declare hidden void @void_fastcc_byval_and_stack_passed([3 x i32] addrspace(5)* byval([3 x i32]) align 16, [32 x i32], i32) +declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32) ; Callee has a byval and non-byval stack passed argument define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 { @@ -1159,8 +1157,8 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_byval_and_stack_passed, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %alloca = alloca [3 x i32], align 16, addrspace(5) - store [3 x i32] [i32 9, i32 9, i32 9], [3 x i32] addrspace(5)* %alloca - tail call fastcc void @void_fastcc_byval_and_stack_passed([3 x i32] addrspace(5)* byval([3 x i32]) %alloca, [32 x i32] zeroinitializer, i32 %stack.out.arg) + store [3 x i32] [i32 9, i32 9, i32 9], ptr addrspace(5) %alloca + tail call fastcc void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) %alloca, [32 x i32] zeroinitializer, i32 %stack.out.arg) ret void } @@ -1213,9 +1211,9 @@ entry: ret i64 %ret } -declare hidden fastcc i8 addrspace(1)* @p1i8_fastcc_p1i8(i8 addrspace(1)* %arg0) +declare hidden fastcc ptr addrspace(1) @p1i8_fastcc_p1i8(ptr addrspace(1) %arg0) -define hidden fastcc i8 addrspace(1)* @sibling_call_p1i8_fastcc_p1i8(i8 addrspace(1)* %a) #1 { +define hidden fastcc ptr addrspace(1) @sibling_call_p1i8_fastcc_p1i8(ptr addrspace(1) %a) #1 { ; GCN-LABEL: name: sibling_call_p1i8_fastcc_p1i8 ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 @@ -1258,8 +1256,8 @@ define hidden fastcc i8 addrspace(1)* @sibling_call_p1i8_fastcc_p1i8(i8 addrspac ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @p1i8_fastcc_p1i8, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: - %ret = tail call fastcc i8 addrspace(1)* @p1i8_fastcc_p1i8(i8 addrspace(1)* %a) - ret i8 addrspace(1)* %ret + %ret = tail call fastcc ptr addrspace(1) @p1i8_fastcc_p1i8(ptr addrspace(1) %a) + ret ptr addrspace(1) %ret } declare hidden fastcc i16 @i16_fastcc_i16(i16 %arg0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll index 403375d6389f8..cb81871be3a58 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll @@ -11,17 +11,16 @@ define amdgpu_ps void @test_div_scale(float %arg0, float %arg1) { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), -1 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1) - ; CHECK-NEXT: G_STORE [[INT]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[SEXT]](s32), [[COPY2]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[INT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %call = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %arg0, float %arg1, i1 true) %extract0 = extractvalue { float, i1 } %call, 0 %extract1 = extractvalue { float, i1 } %call, 1 %ext = sext i1 %extract1 to i32 - store float %extract0, float addrspace(1)* undef - store i32 %ext, i32 addrspace(1)* undef + store float %extract0, ptr addrspace(1) undef + store i32 %ext, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll index 72c0adcd91ccf..ea0376afe8ec0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll @@ -28,7 +28,7 @@ define void @func_use_lds_global() { ; GFX9-NEXT: ds_write_b32 v0, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] - store float 0.0, float addrspace(3)* @lds, align 4 + store float 0.0, ptr addrspace(3) @lds, align 4 ret void } @@ -50,6 +50,6 @@ define void @func_use_lds_global_constexpr_cast() { ; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] - store i32 ptrtoint (float addrspace(3)* @lds to i32), i32 addrspace(1)* undef, align 4 + store i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) undef, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll index 7bf9e3fcda308..2f718814ef77b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll @@ -5,7 +5,7 @@ @lds_512_4 = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4 @lds_4_8 = addrspace(3) global i32 undef, align 8 -define amdgpu_kernel void @use_lds_globals(i32 addrspace(1)* %out, i32 addrspace(3)* %in) #0 { +define amdgpu_kernel void @use_lds_globals(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 { ; CHECK-LABEL: use_lds_globals: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -23,11 +23,11 @@ define amdgpu_kernel void @use_lds_globals(i32 addrspace(1)* %out, i32 addrspace ; CHECK-NEXT: ds_write_b32 v0, v3 ; CHECK-NEXT: s_endpgm entry: - %tmp0 = getelementptr [128 x i32], [128 x i32] addrspace(3)* @lds_512_4, i32 0, i32 1 - %tmp1 = load i32, i32 addrspace(3)* %tmp0 - %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1 - store i32 %tmp1, i32 addrspace(1)* %tmp2 - store i32 9, i32 addrspace(3)* @lds_4_8 + %tmp0 = getelementptr [128 x i32], ptr addrspace(3) @lds_512_4, i32 0, i32 1 + %tmp1 = load i32, ptr addrspace(3) %tmp0 + %tmp2 = getelementptr i32, ptr addrspace(1) %out, i32 1 + store i32 %tmp1, ptr addrspace(1) %tmp2 + store i32 9, ptr addrspace(3) @lds_4_8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll index fc4c3f710c28f..2921246d9adef 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll @@ -10,17 +10,16 @@ ; GCN-LABEL: test_local_misaligned_v2: ; GCN-DAG: ds_{{read2|load_2addr}}_b32 ; GCN-DAG: ds_{{write2|store_2addr}}_b32 -define amdgpu_kernel void @test_local_misaligned_v2(i32 addrspace(3)* %arg) { +define amdgpu_kernel void @test_local_misaligned_v2(ptr addrspace(3) %arg) { bb: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid - %ptr = bitcast i32 addrspace(3)* %gep to <2 x i32> addrspace(3)* - %load = load <2 x i32>, <2 x i32> addrspace(3)* %ptr, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid + %load = load <2 x i32>, ptr addrspace(3) %gep, align 4 %v1 = extractelement <2 x i32> %load, i32 0 %v2 = extractelement <2 x i32> %load, i32 1 %v3 = insertelement <2 x i32> undef, i32 %v2, i32 0 %v4 = insertelement <2 x i32> %v3, i32 %v1, i32 1 - store <2 x i32> %v4, <2 x i32> addrspace(3)* %ptr, align 4 + store <2 x i32> %v4, ptr addrspace(3) %gep, align 4 ret void } @@ -31,12 +30,11 @@ bb: ; ALIGNED-DAG: ds_{{write2|store_2addr}}_b32 ; UNALIGNED-DAG: ds_{{read2|load_2addr}}_b64 ; UNALIGNED-DAG: ds_{{write2|store_2addr}}_b64 -define amdgpu_kernel void @test_local_misaligned_v4(i32 addrspace(3)* %arg) { +define amdgpu_kernel void @test_local_misaligned_v4(ptr addrspace(3) %arg) { bb: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid - %ptr = bitcast i32 addrspace(3)* %gep to <4 x i32> addrspace(3)* - %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid + %load = load <4 x i32>, ptr addrspace(3) %gep, align 4 %v1 = extractelement <4 x i32> %load, i32 0 %v2 = extractelement <4 x i32> %load, i32 1 %v3 = extractelement <4 x i32> %load, i32 2 @@ -45,7 +43,7 @@ bb: %v6 = insertelement <4 x i32> %v5, i32 %v3, i32 1 %v7 = insertelement <4 x i32> %v6, i32 %v2, i32 2 %v8 = insertelement <4 x i32> %v7, i32 %v1, i32 3 - store <4 x i32> %v8, <4 x i32> addrspace(3)* %ptr, align 4 + store <4 x i32> %v8, ptr addrspace(3) %gep, align 4 ret void } @@ -56,55 +54,52 @@ bb: ; ALIGNED-DAG: ds_{{write|store}}_b32 ; UNALIGNED-DAG: ds_{{read|load}}_b96 ; UNALIGNED-DAG: ds_{{write|store}}_b96 -define amdgpu_kernel void @test_local_misaligned_v3(i32 addrspace(3)* %arg) { +define amdgpu_kernel void @test_local_misaligned_v3(ptr addrspace(3) %arg) { bb: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid - %ptr = bitcast i32 addrspace(3)* %gep to <3 x i32> addrspace(3)* - %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid + %load = load <3 x i32>, ptr addrspace(3) %gep, align 4 %v1 = extractelement <3 x i32> %load, i32 0 %v2 = extractelement <3 x i32> %load, i32 1 %v3 = extractelement <3 x i32> %load, i32 2 %v5 = insertelement <3 x i32> undef, i32 %v3, i32 0 %v6 = insertelement <3 x i32> %v5, i32 %v1, i32 1 %v7 = insertelement <3 x i32> %v6, i32 %v2, i32 2 - store <3 x i32> %v7, <3 x i32> addrspace(3)* %ptr, align 4 + store <3 x i32> %v7, ptr addrspace(3) %gep, align 4 ret void } ; GCN-LABEL: test_local_aligned_v2: ; GCN-DAG: ds_{{read|load}}_b64 ; GCN-DAG: ds_{{write|store}}_b64 -define amdgpu_kernel void @test_local_aligned_v2(i32 addrspace(3)* %arg) { +define amdgpu_kernel void @test_local_aligned_v2(ptr addrspace(3) %arg) { bb: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid - %ptr = bitcast i32 addrspace(3)* %gep to <2 x i32> addrspace(3)* - %load = load <2 x i32>, <2 x i32> addrspace(3)* %ptr, align 8 + %gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid + %load = load <2 x i32>, ptr addrspace(3) %gep, align 8 %v1 = extractelement <2 x i32> %load, i32 0 %v2 = extractelement <2 x i32> %load, i32 1 %v3 = insertelement <2 x i32> undef, i32 %v2, i32 0 %v4 = insertelement <2 x i32> %v3, i32 %v1, i32 1 - store <2 x i32> %v4, <2 x i32> addrspace(3)* %ptr, align 8 + store <2 x i32> %v4, ptr addrspace(3) %gep, align 8 ret void } ; GCN-LABEL: test_local_aligned_v3: ; GCN-DAG: ds_{{read|load}}_b96 ; GCN-DAG: ds_{{write|store}}_b96 -define amdgpu_kernel void @test_local_aligned_v3(i32 addrspace(3)* %arg) { +define amdgpu_kernel void @test_local_aligned_v3(ptr addrspace(3) %arg) { bb: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid - %ptr = bitcast i32 addrspace(3)* %gep to <3 x i32> addrspace(3)* - %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 16 + %gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid + %load = load <3 x i32>, ptr addrspace(3) %gep, align 16 %v1 = extractelement <3 x i32> %load, i32 0 %v2 = extractelement <3 x i32> %load, i32 1 %v3 = extractelement <3 x i32> %load, i32 2 %v5 = insertelement <3 x i32> undef, i32 %v3, i32 0 %v6 = insertelement <3 x i32> %v5, i32 %v1, i32 1 %v7 = insertelement <3 x i32> %v6, i32 %v2, i32 2 - store <3 x i32> %v7, <3 x i32> addrspace(3)* %ptr, align 16 + store <3 x i32> %v7, ptr addrspace(3) %gep, align 16 ret void } @@ -117,12 +112,11 @@ bb: ; ALIGNED-CU-DAG: ds_{{write2|store_2addr}}_b64 ; UNALIGNED-DAG: ds_{{read2|load_2addr}}_b64 ; UNALIGNED-DAG: ds_{{write2|store_2addr}}_b64 -define amdgpu_kernel void @test_local_v4_aligned8(i32 addrspace(3)* %arg) { +define amdgpu_kernel void @test_local_v4_aligned8(ptr addrspace(3) %arg) { bb: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid - %ptr = bitcast i32 addrspace(3)* %gep to <4 x i32> addrspace(3)* - %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 8 + %gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid + %load = load <4 x i32>, ptr addrspace(3) %gep, align 8 %v1 = extractelement <4 x i32> %load, i32 0 %v2 = extractelement <4 x i32> %load, i32 1 %v3 = extractelement <4 x i32> %load, i32 2 @@ -131,7 +125,7 @@ bb: %v6 = insertelement <4 x i32> %v5, i32 %v3, i32 1 %v7 = insertelement <4 x i32> %v6, i32 %v2, i32 2 %v8 = insertelement <4 x i32> %v7, i32 %v1, i32 3 - store <4 x i32> %v8, <4 x i32> addrspace(3)* %ptr, align 8 + store <4 x i32> %v8, ptr addrspace(3) %gep, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll index 1f76655d935f6..cd536e2336cac 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll @@ -14,11 +14,11 @@ ; GCN: .amdgpu_lds lds.defined, 32, 8 define amdgpu_gs float @test_basic(i32 inreg %wave, i32 %arg1) #0 { main_body: - %gep0 = getelementptr [0 x i32], [0 x i32] addrspace(3)* @lds.external, i32 0, i32 %arg1 - %tmp = load i32, i32 addrspace(3)* %gep0 + %gep0 = getelementptr [0 x i32], ptr addrspace(3) @lds.external, i32 0, i32 %arg1 + %tmp = load i32, ptr addrspace(3) %gep0 - %gep1 = getelementptr [8 x i32], [8 x i32] addrspace(3)* @lds.defined, i32 0, i32 %wave - store i32 123, i32 addrspace(3)* %gep1 + %gep1 = getelementptr [8 x i32], ptr addrspace(3) @lds.defined, i32 0, i32 %wave + store i32 123, ptr addrspace(3) %gep1 %r = bitcast i32 %tmp to float ret float %r diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll index 4f45c34bfb53d..156b6b77571ab 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll @@ -8,7 +8,7 @@ @lds = addrspace(3) global [256 x i32] zeroinitializer -define amdgpu_kernel void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 %p) { +define amdgpu_kernel void @load_zeroinit_lds_global(ptr addrspace(1) %out, i1 %p) { ; GCN-LABEL: name: load_zeroinit_lds_global ; GCN: bb.1 (%ir-block.0): ; GCN: liveins: $sgpr0_sgpr1 @@ -31,8 +31,8 @@ define amdgpu_kernel void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 % ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX9: FLAT_STORE_DWORD [[COPY2]], [[DS_READ_B32_]], 0, 0, implicit $exec, implicit $flat_scr ; GCN: S_ENDPGM 0 - %gep = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds, i32 0, i32 10 - %ld = load i32, i32 addrspace(3)* %gep - store i32 %ld, i32 addrspace(1)* %out + %gep = getelementptr [256 x i32], ptr addrspace(3) @lds, i32 0, i32 10 + %ld = load i32, ptr addrspace(3) %gep + store i32 %ld, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll index e86e224bc2b32..a8296dbeb2177 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll @@ -3278,7 +3278,7 @@ define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) @@ -3305,7 +3305,7 @@ define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) @@ -3316,7 +3316,7 @@ main_body: %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 1, i32 0) %data = extractvalue { <4 x float>, i32 } %v, 0 %tfe = extractvalue { <4 x float>, i32 } %v, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <4 x float> %data } @@ -3347,7 +3347,7 @@ define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) @@ -3379,7 +3379,7 @@ define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) @@ -3391,7 +3391,7 @@ main_body: %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 1, i32 0) %data = extractvalue { <4 x float>, i32 } %v, 0 %tfe = extractvalue { <4 x float>, i32 } %v, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <4 x float> %data } @@ -3429,7 +3429,7 @@ define amdgpu_ps <4 x float> @load_3d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) @@ -3468,7 +3468,7 @@ define amdgpu_ps <4 x float> @load_3d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) @@ -3481,7 +3481,7 @@ main_body: %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 1, i32 0) %data = extractvalue { <4 x float>, i32 } %v, 0 %tfe = extractvalue { <4 x float>, i32 } %v, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <4 x float> %data } @@ -3521,7 +3521,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, <2 x i ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) @@ -3562,7 +3562,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, <2 x i ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) @@ -3576,7 +3576,7 @@ main_body: %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %data = extractvalue { <4 x float>, i32 } %v, 0 %tfe = extractvalue { <4 x float>, i32 } %v, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <4 x float> %data } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll index cbf4c9bcefc50..d52653262d35d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -244,7 +244,7 @@ define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](s32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_f16 @@ -266,13 +266,13 @@ define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; PACKED-NEXT: $vgpr0 = COPY [[UV]](s32) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { half, i32 } %res, 0 %tfe = extractvalue { half, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret half %tex } @@ -303,7 +303,7 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16 @@ -326,13 +326,13 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 %tfe = extractvalue { <2 x half>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <2 x half> %tex } @@ -356,7 +356,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] @@ -394,7 +394,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>) - ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -408,7 +408,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 %tfe = extractvalue { <3 x half>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <3 x half> %tex } @@ -432,7 +432,7 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<5 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] @@ -469,14 +469,14 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) - ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 %tfe = extractvalue { <4 x half>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <4 x half> %tex } @@ -959,7 +959,7 @@ define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](s32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_f16_dmask_0000 @@ -981,13 +981,13 @@ define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; PACKED-NEXT: $vgpr0 = COPY [[UV]](s32) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { half, i32 } %res, 0 %tfe = extractvalue { half, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret half %tex } @@ -1018,7 +1018,7 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000 @@ -1041,13 +1041,13 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rs ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 %tfe = extractvalue { <2 x half>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <2 x half> %tex } @@ -1078,7 +1078,7 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000 @@ -1101,13 +1101,13 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rs ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 %tfe = extractvalue { <2 x half>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <2 x half> %tex } @@ -1131,7 +1131,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] @@ -1166,7 +1166,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) @@ -1178,7 +1178,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 %tfe = extractvalue { <3 x half>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <3 x half> %tex } @@ -1202,7 +1202,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1235,7 +1235,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) @@ -1247,7 +1247,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 %tfe = extractvalue { <3 x half>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <3 x half> %tex } @@ -1271,7 +1271,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1304,7 +1304,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) @@ -1316,7 +1316,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 %tfe = extractvalue { <3 x half>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <3 x half> %tex } @@ -1340,7 +1340,7 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] @@ -1377,14 +1377,14 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rs ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) - ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 %tfe = extractvalue { <4 x half>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <4 x half> %tex } @@ -1408,7 +1408,7 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] @@ -1444,14 +1444,14 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rs ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 %tfe = extractvalue { <4 x half>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <4 x half> %tex } @@ -1475,7 +1475,7 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1509,14 +1509,14 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rs ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 %tfe = extractvalue { <4 x half>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <4 x half> %tex } @@ -1540,7 +1540,7 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) - ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1574,14 +1574,14 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rs ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 %tfe = extractvalue { <4 x half>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <4 x half> %tex } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll index e20e567fae039..46470e44805b1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll @@ -126,13 +126,13 @@ define amdgpu_ps float @image_load_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { float, i32 } %res, 0 %tfe = extractvalue { float, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret float %tex } @@ -156,14 +156,14 @@ define amdgpu_ps <2 x float> @image_load_tfe_v2f32(<8 x i32> inreg %rsrc, i32 %s ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x float>, i32 } %res, 0 %tfe = extractvalue { <2 x float>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <2 x float> %tex } @@ -187,7 +187,7 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32(<8 x i32> inreg %rsrc, i32 %s ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>) from custom "ImageResource", align 16) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) @@ -195,7 +195,7 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32(<8 x i32> inreg %rsrc, i32 %s %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 %tfe = extractvalue { <3 x float>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <3 x float> %tex } @@ -219,7 +219,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32(<8 x i32> inreg %rsrc, i32 %s ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GCN-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) @@ -228,7 +228,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32(<8 x i32> inreg %rsrc, i32 %s %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 %tfe = extractvalue { <4 x float>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <4 x float> %tex } @@ -484,13 +484,13 @@ define amdgpu_ps float @image_load_tfe_f32_dmask_0000(<8 x i32> inreg %rsrc, i32 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { float, i32 } %res, 0 %tfe = extractvalue { float, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret float %tex } @@ -515,14 +515,14 @@ define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_1000(<8 x i32> inreg %r ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x float>, i32 } %res, 0 %tfe = extractvalue { <2 x float>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <2 x float> %tex } @@ -547,14 +547,14 @@ define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_0000(<8 x i32> inreg %r ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x float>, i32 } %res, 0 %tfe = extractvalue { <2 x float>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <2 x float> %tex } @@ -579,7 +579,7 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1100(<8 x i32> inreg %r ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) @@ -587,7 +587,7 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1100(<8 x i32> inreg %r %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 %tfe = extractvalue { <3 x float>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <3 x float> %tex } @@ -612,7 +612,7 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1000(<8 x i32> inreg %r ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) @@ -620,7 +620,7 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1000(<8 x i32> inreg %r %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 %tfe = extractvalue { <3 x float>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <3 x float> %tex } @@ -645,7 +645,7 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_0000(<8 x i32> inreg %r ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) @@ -653,7 +653,7 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_0000(<8 x i32> inreg %r %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 %tfe = extractvalue { <3 x float>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <3 x float> %tex } @@ -678,7 +678,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1110(<8 x i32> inreg %r ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>) from custom "ImageResource", align 16) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) @@ -687,7 +687,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1110(<8 x i32> inreg %r %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 %tfe = extractvalue { <4 x float>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <4 x float> %tex } @@ -712,7 +712,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1100(<8 x i32> inreg %r ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) @@ -721,7 +721,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1100(<8 x i32> inreg %r %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 %tfe = extractvalue { <4 x float>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <4 x float> %tex } @@ -746,7 +746,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1000(<8 x i32> inreg %r ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) @@ -755,7 +755,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1000(<8 x i32> inreg %r %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 %tfe = extractvalue { <4 x float>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <4 x float> %tex } @@ -780,7 +780,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_0000(<8 x i32> inreg %r ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) @@ -789,7 +789,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_0000(<8 x i32> inreg %r %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 %tfe = extractvalue { <4 x float>, i32 } %res, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret <4 x float> %tex } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll index 8421dcf991c94..b5da2f92fdea7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll @@ -57,7 +57,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i3 ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { ; GFX6-LABEL: name: load_2darraymsaa_tfe ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -118,7 +118,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 ad %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 %v.err = extractvalue { <4 x float>, i32 } %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll index 8e41a41219776..a62d1b5321409 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll @@ -68,7 +68,7 @@ define amdgpu_ps float @image_load_3d_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32 ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX6-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX6-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: image_load_3d_tfe_f32 @@ -90,13 +90,13 @@ define amdgpu_ps float @image_load_3d_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX10NSA-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX10NSA-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call { float, i32 } @llvm.amdgcn.image.load.3d.sl_f32i32s.i32(i32 1, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { float, i32 } %val, 0 %tfe = extractvalue { float, i32 } %val, 1 - store i32 %tfe, i32 addrspace(1)* undef + store i32 %tfe, ptr addrspace(1) undef ret float %tex } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll index 474d2e4c8418d..9fa0376197ecd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll @@ -6,17 +6,17 @@ ; register bank copies, and no return optimization is missing. -declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2 -declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 -declare i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr nocapture, i32, i32, i32, i1) #2 -declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2 -declare i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2 -declare i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2 +declare i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #2 +declare i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) nocapture, i64, i32, i32, i1) #2 +declare i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr nocapture, i64, i32, i32, i1) #2 declare i32 @llvm.amdgcn.workitem.id.x() #1 -define amdgpu_kernel void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { +define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 { ; CI-LABEL: lds_atomic_dec_ret_i32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -59,12 +59,12 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 ad ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: s_endpgm - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { +define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 { ; CI-LABEL: lds_atomic_dec_ret_i32_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -108,13 +108,13 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out + %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind { +define amdgpu_kernel void @lds_atomic_dec_noret_i32(ptr addrspace(3) %ptr) nounwind { ; GCN-LABEL: lds_atomic_dec_noret_i32: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s0, s[4:5], 0x0 @@ -151,11 +151,11 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) noun ; VI-NEXT: v_mov_b32_e32 v1, s0 ; VI-NEXT: ds_dec_u32 v1, v0 ; VI-NEXT: s_endpgm - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { +define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(ptr addrspace(3) %ptr) nounwind { ; CI-LABEL: lds_atomic_dec_noret_i32_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s0, s[4:5], 0x0 @@ -184,12 +184,12 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %pt ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: ds_dec_rtn_u32 v0, v0, v1 ; GFX9-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) + %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %gep, i32 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; CI-LABEL: global_atomic_dec_ret_i32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -230,12 +230,12 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: s_endpgm - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; CI-LABEL: global_atomic_dec_ret_i32_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -282,13 +282,13 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %o ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind { +define amdgpu_kernel void @global_atomic_dec_noret_i32(ptr addrspace(1) %ptr) nounwind { ; CI-LABEL: global_atomic_dec_noret_i32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -317,11 +317,11 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) n ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; GFX9-NEXT: s_endpgm - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr, i32 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { +define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(ptr addrspace(1) %ptr) nounwind { ; CI-LABEL: global_atomic_dec_noret_i32_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -356,12 +356,12 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; GFX9-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; CI-LABEL: global_atomic_dec_ret_i32_offset_addr64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -428,15 +428,15 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace ; GFX9-NEXT: global_store_dword v[2:3], v0, off ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id - %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out.gep + %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id + %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id + %gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(ptr addrspace(1) %ptr) #0 { ; CI-LABEL: global_atomic_dec_noret_i32_offset_addr64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -486,13 +486,13 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspa ; GFX9-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id - %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id + %gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) #0 { ; CI-LABEL: flat_atomic_dec_ret_i32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -533,12 +533,12 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr) #0 { ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_store_dword v[0:1], v2 ; GFX9-NEXT: s_endpgm - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32* %out + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr %out ret void } -define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32* %out, i32* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #0 { ; CI-LABEL: flat_atomic_dec_ret_i32_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -585,13 +585,13 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32* %out, i32* %ptr) ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_store_dword v[0:1], v2 ; GFX9-NEXT: s_endpgm - %gep = getelementptr i32, i32* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32* %out + %gep = getelementptr i32, ptr %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr %out ret void } -define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32* %ptr) nounwind { +define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) nounwind { ; CI-LABEL: flat_atomic_dec_noret_i32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -620,11 +620,11 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32* %ptr) nounwind { ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; GFX9-NEXT: s_endpgm - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32* %ptr) nounwind { +define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) nounwind { ; CI-LABEL: flat_atomic_dec_noret_i32_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -659,12 +659,12 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32* %ptr) nounwind ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; GFX9-NEXT: s_endpgm - %gep = getelementptr i32, i32* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) + %gep = getelementptr i32, ptr %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %ptr) #0 { ; CI-LABEL: flat_atomic_dec_ret_i32_offset_addr64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -731,15 +731,15 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32* %out, i32* ; GFX9-NEXT: flat_store_dword v[2:3], v0 ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32* %ptr, i32 %id - %out.gep = getelementptr i32, i32* %out, i32 %id - %gep = getelementptr i32, i32* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32* %out.gep + %gep.tid = getelementptr i32, ptr %ptr, i32 %id + %out.gep = getelementptr i32, ptr %out, i32 %id + %gep = getelementptr i32, ptr %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr %out.gep ret void } -define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #0 { ; CI-LABEL: flat_atomic_dec_noret_i32_offset_addr64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -789,13 +789,13 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0 ; GFX9-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32* %ptr, i32 %id - %gep = getelementptr i32, i32* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i32, ptr %ptr, i32 %id + %gep = getelementptr i32, ptr %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_dec_ret_i64(ptr %out, ptr %ptr) #0 { ; CI-LABEL: flat_atomic_dec_ret_i64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -839,12 +839,12 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 { ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX9-NEXT: s_endpgm - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64* %out + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr %out ret void } -define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #0 { ; CI-LABEL: flat_atomic_dec_ret_i64_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -894,13 +894,13 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr) ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX9-NEXT: s_endpgm - %gep = getelementptr i64, i64* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64* %out + %gep = getelementptr i64, ptr %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr %out ret void } -define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64* %ptr) nounwind { +define amdgpu_kernel void @flat_atomic_dec_noret_i64(ptr %ptr) nounwind { ; CI-LABEL: flat_atomic_dec_noret_i64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -932,11 +932,11 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64* %ptr) nounwind { ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; GFX9-NEXT: s_endpgm - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64* %ptr) nounwind { +define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) nounwind { ; CI-LABEL: flat_atomic_dec_noret_i64_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -974,12 +974,12 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64* %ptr) nounwind ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; GFX9-NEXT: s_endpgm - %gep = getelementptr i64, i64* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) + %gep = getelementptr i64, ptr %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %ptr) #0 { ; CI-LABEL: flat_atomic_dec_ret_i64_offset_addr64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -1049,15 +1049,15 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64* ; GFX9-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64* %ptr, i32 %id - %out.gep = getelementptr i64, i64* %out, i32 %id - %gep = getelementptr i64, i64* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64* %out.gep + %gep.tid = getelementptr i64, ptr %ptr, i32 %id + %out.gep = getelementptr i64, ptr %out, i32 %id + %gep = getelementptr i64, ptr %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr %out.gep ret void } -define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #0 { ; CI-LABEL: flat_atomic_dec_noret_i64_offset_addr64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -1110,15 +1110,15 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0 ; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64* %ptr, i32 %id - %gep = getelementptr i64, i64* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i64, ptr %ptr, i32 %id + %gep = getelementptr i64, ptr %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) ret void } @lds0 = internal addrspace(3) global [512 x i32] undef -define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { +define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 { ; CI-LABEL: atomic_dec_shl_base_lds_0: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -1171,14 +1171,14 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 ; GFX9-NEXT: s_endpgm %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 - %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0 - %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false) - store i32 %idx.0, i32 addrspace(1)* %add_use - store i32 %val0, i32 addrspace(1)* %out + %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0 + %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %arrayidx0, i32 9, i32 0, i32 0, i1 false) + store i32 %idx.0, ptr addrspace(1) %add_use + store i32 %val0, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { +define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 { ; CI-LABEL: lds_atomic_dec_ret_i64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -1224,12 +1224,12 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 ad ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX9-NEXT: s_endpgm - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %ptr, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { +define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 { ; CI-LABEL: lds_atomic_dec_ret_i64_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -1276,13 +1276,13 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX9-NEXT: s_endpgm - %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out + %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind { +define amdgpu_kernel void @lds_atomic_dec_noret_i64(ptr addrspace(3) %ptr) nounwind { ; CI-LABEL: lds_atomic_dec_noret_i64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s0, s[4:5], 0x0 @@ -1313,11 +1313,11 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) noun ; GFX9-NEXT: v_mov_b32_e32 v2, s0 ; GFX9-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] ; GFX9-NEXT: s_endpgm - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %ptr, i64 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { +define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(ptr addrspace(3) %ptr) nounwind { ; CI-LABEL: lds_atomic_dec_noret_i64_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s0, s[4:5], 0x0 @@ -1349,12 +1349,12 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %pt ; GFX9-NEXT: v_mov_b32_e32 v2, s0 ; GFX9-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] ; GFX9-NEXT: s_endpgm - %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) + %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %gep, i64 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; CI-LABEL: global_atomic_dec_ret_i64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -1398,12 +1398,12 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX9-NEXT: s_endpgm - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %ptr, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; CI-LABEL: global_atomic_dec_ret_i64_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -1453,13 +1453,13 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %o ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX9-NEXT: s_endpgm - %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out + %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind { +define amdgpu_kernel void @global_atomic_dec_noret_i64(ptr addrspace(1) %ptr) nounwind { ; CI-LABEL: global_atomic_dec_noret_i64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -1491,11 +1491,11 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) n ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; GFX9-NEXT: s_endpgm - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %ptr, i64 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind { +define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(ptr addrspace(1) %ptr) nounwind { ; CI-LABEL: global_atomic_dec_noret_i64_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -1533,12 +1533,12 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; GFX9-NEXT: s_endpgm - %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) + %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; CI-LABEL: global_atomic_dec_ret_i64_offset_addr64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -1608,15 +1608,15 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id - %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id - %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out.gep + %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id + %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id + %gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(ptr addrspace(1) %ptr) #0 { ; CI-LABEL: global_atomic_dec_noret_i64_offset_addr64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -1669,15 +1669,15 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspa ; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id - %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id + %gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false) ret void } @lds1 = internal addrspace(3) global [512 x i64] undef, align 8 -define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { +define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 { ; CI-LABEL: atomic_dec_shl_base_lds_0_i64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -1733,10 +1733,10 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, ; GFX9-NEXT: s_endpgm %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 - %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0 - %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false) - store i32 %idx.0, i32 addrspace(1)* %add_use - store i64 %val0, i64 addrspace(1)* %out + %arrayidx0 = getelementptr inbounds [512 x i64], ptr addrspace(3) @lds1, i32 0, i32 %idx.0 + %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %arrayidx0, i64 9, i32 0, i32 0, i1 false) + store i32 %idx.0, ptr addrspace(1) %add_use + store i64 %val0, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll index d041f2e296b1d..1eddf8666e6a2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll @@ -9,17 +9,17 @@ ; register bank copies, and no return optimization is missing. -declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2 -declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 -declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr nocapture, i32, i32, i32, i1) #2 -declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2 -declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2 -declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2 +declare i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #2 +declare i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) nocapture, i64, i32, i32, i1) #2 +declare i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr nocapture, i64, i32, i32, i1) #2 declare i32 @llvm.amdgcn.workitem.id.x() #1 -define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { +define amdgpu_kernel void @lds_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 { ; CI-LABEL: lds_atomic_inc_ret_i32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -89,8 +89,8 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 ad ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false), !noalias !0 - store i32 %result, i32 addrspace(1)* %out + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false), !noalias !0 + store i32 %result, ptr addrspace(1) %out ret void } @@ -98,7 +98,7 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 ad !1 = distinct !{!1, !2} !2 = distinct !{!2} -define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { +define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 { ; CI-LABEL: lds_atomic_inc_ret_i32_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -168,13 +168,13 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out + %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind { +define amdgpu_kernel void @lds_atomic_inc_noret_i32(ptr addrspace(3) %ptr) nounwind { ; CI-LABEL: lds_atomic_inc_noret_i32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s0, s[4:5], 0x0 @@ -220,11 +220,11 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) noun ; GFX11-NEXT: v_dual_mov_b32 v1, 42 :: v_dual_mov_b32 v0, s0 ; GFX11-NEXT: ds_inc_u32 v0, v1 ; GFX11-NEXT: s_endpgm - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { +define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(ptr addrspace(3) %ptr) nounwind { ; CI-LABEL: lds_atomic_inc_noret_i32_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s0, s[4:5], 0x0 @@ -270,12 +270,12 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %pt ; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, s0 ; GFX11-NEXT: ds_inc_u32 v1, v0 offset:16 ; GFX11-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) + %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %gep, i32 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; CI-LABEL: global_atomic_inc_ret_i32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -336,12 +336,12 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; CI-LABEL: global_atomic_inc_ret_i32_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -406,13 +406,13 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %o ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind { +define amdgpu_kernel void @global_atomic_inc_noret_i32(ptr addrspace(1) %ptr) nounwind { ; CI-LABEL: global_atomic_inc_noret_i32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -459,11 +459,11 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) n ; GFX11-NEXT: global_atomic_inc_u32 v1, v0, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr, i32 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { +define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(ptr addrspace(1) %ptr) nounwind { ; CI-LABEL: global_atomic_inc_noret_i32_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -514,12 +514,12 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* ; GFX11-NEXT: global_atomic_inc_u32 v1, v0, s[0:1] offset:16 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; CI-LABEL: global_atomic_inc_ret_i32_offset_addr64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -595,15 +595,15 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id - %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out.gep + %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id + %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id + %gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(ptr addrspace(1) %ptr) #0 { ; CI-LABEL: global_atomic_inc_noret_i32_offset_addr64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -661,15 +661,15 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspa ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id - %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id + %gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false) ret void } @lds0 = internal addrspace(3) global [512 x i32] undef, align 4 -define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { +define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 { ; CI-LABEL: atomic_inc_shl_base_lds_0_i32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -745,14 +745,14 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, ; GFX11-NEXT: s_endpgm %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 - %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0 - %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false) - store i32 %idx.0, i32 addrspace(1)* %add_use - store i32 %val0, i32 addrspace(1)* %out + %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0 + %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %arrayidx0, i32 9, i32 0, i32 0, i1 false) + store i32 %idx.0, ptr addrspace(1) %add_use + store i32 %val0, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { +define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 { ; CI-LABEL: lds_atomic_inc_ret_i64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -827,12 +827,12 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 ad ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %ptr, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { +define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 { ; CI-LABEL: lds_atomic_inc_ret_i64_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s2, s[4:5], 0x2 @@ -907,13 +907,13 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out + %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind { +define amdgpu_kernel void @lds_atomic_inc_noret_i64(ptr addrspace(3) %ptr) nounwind { ; CI-LABEL: lds_atomic_inc_noret_i64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s0, s[4:5], 0x0 @@ -964,11 +964,11 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) noun ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: ds_inc_u64 v2, v[0:1] ; GFX11-NEXT: s_endpgm - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %ptr, i64 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { +define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(ptr addrspace(3) %ptr) nounwind { ; CI-LABEL: lds_atomic_inc_noret_i64_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s0, s[4:5], 0x0 @@ -1019,12 +1019,12 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %pt ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: ds_inc_u64 v2, v[0:1] offset:32 ; GFX11-NEXT: s_endpgm - %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) + %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %gep, i64 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; CI-LABEL: global_atomic_inc_ret_i64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -1090,12 +1090,12 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %ptr, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; CI-LABEL: global_atomic_inc_ret_i64_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -1165,13 +1165,13 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %o ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out + %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind { +define amdgpu_kernel void @global_atomic_inc_noret_i64(ptr addrspace(1) %ptr) nounwind { ; CI-LABEL: global_atomic_inc_noret_i64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -1223,11 +1223,11 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) n ; GFX11-NEXT: global_atomic_inc_u64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %ptr, i64 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind { +define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(ptr addrspace(1) %ptr) nounwind { ; CI-LABEL: global_atomic_inc_noret_i64_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -1283,12 +1283,12 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* ; GFX11-NEXT: global_atomic_inc_u64 v2, v[0:1], s[0:1] offset:32 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) + %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; CI-LABEL: global_atomic_inc_ret_i64_offset_addr64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -1369,15 +1369,15 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id - %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id - %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out.gep + %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id + %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id + %gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(ptr addrspace(1) %ptr) #0 { ; CI-LABEL: global_atomic_inc_noret_i64_offset_addr64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -1440,13 +1440,13 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspa ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id - %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id + %gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_inc_ret_i32(ptr %out, ptr %ptr) #0 { ; GCN-LABEL: flat_atomic_inc_ret_i32: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -1473,12 +1473,12 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 { ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32* %out + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr %out ret void } -define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) #0 { ; CI-LABEL: flat_atomic_inc_ret_i32_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -1553,13 +1553,13 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %gep = getelementptr i32, i32* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32* %out + %gep = getelementptr i32, ptr %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr %out ret void } -define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind { +define amdgpu_kernel void @flat_atomic_inc_noret_i32(ptr %ptr) nounwind { ; GCN-LABEL: flat_atomic_inc_noret_i32: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -1579,11 +1579,11 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind { ; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind { +define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) nounwind { ; CI-LABEL: flat_atomic_inc_noret_i32_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -1639,12 +1639,12 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind ; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2 offset:16 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %gep = getelementptr i32, i32* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) + %gep = getelementptr i32, ptr %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %ptr) #0 { ; CI-LABEL: flat_atomic_inc_ret_i32_offset_addr64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -1746,15 +1746,15 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32* %ptr, i32 %id - %out.gep = getelementptr i32, i32* %out, i32 %id - %gep = getelementptr i32, i32* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32* %out.gep + %gep.tid = getelementptr i32, ptr %ptr, i32 %id + %out.gep = getelementptr i32, ptr %out, i32 %id + %gep = getelementptr i32, ptr %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr %out.gep ret void } -define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #0 { ; CI-LABEL: flat_atomic_inc_noret_i32_offset_addr64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -1827,15 +1827,15 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32* %ptr, i32 %id - %gep = getelementptr i32, i32* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i32, ptr %ptr, i32 %id + %gep = getelementptr i32, ptr %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) ret void } @lds1 = internal addrspace(3) global [512 x i64] undef, align 8 -define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { +define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 { ; CI-LABEL: atomic_inc_shl_base_lds_0_i64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -1916,14 +1916,14 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, ; GFX11-NEXT: s_endpgm %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 - %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0 - %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false) - store i32 %idx.0, i32 addrspace(1)* %add_use - store i64 %val0, i64 addrspace(1)* %out + %arrayidx0 = getelementptr inbounds [512 x i64], ptr addrspace(3) @lds1, i32 0, i32 %idx.0 + %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %arrayidx0, i64 9, i32 0, i32 0, i1 false) + store i32 %idx.0, ptr addrspace(1) %add_use + store i64 %val0, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_inc_ret_i64(ptr %out, ptr %ptr) #0 { ; GCN-LABEL: flat_atomic_inc_ret_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -1952,12 +1952,12 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 { ; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64* %out + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr %out ret void } -define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) #0 { ; CI-LABEL: flat_atomic_inc_ret_i64_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -2037,13 +2037,13 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) ; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %gep = getelementptr i64, i64* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64* %out + %gep = getelementptr i64, ptr %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr %out ret void } -define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind { +define amdgpu_kernel void @flat_atomic_inc_noret_i64(ptr %ptr) nounwind { ; GCN-LABEL: flat_atomic_inc_noret_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -2065,11 +2065,11 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind { ; GFX11-NEXT: flat_atomic_inc_u64 v[2:3], v[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind { +define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(ptr %ptr) nounwind { ; CI-LABEL: flat_atomic_inc_noret_i64_offset: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -2130,12 +2130,12 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind ; GFX11-NEXT: flat_atomic_inc_u64 v[2:3], v[0:1] offset:32 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %gep = getelementptr i64, i64* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) + %gep = getelementptr i64, ptr %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %ptr) #0 { ; CI-LABEL: flat_atomic_inc_ret_i64_offset_addr64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -2243,15 +2243,15 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64* %ptr, i32 %id - %out.gep = getelementptr i64, i64* %out, i32 %id - %gep = getelementptr i64, i64* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64* %out.gep + %gep.tid = getelementptr i64, ptr %ptr, i32 %id + %out.gep = getelementptr i64, ptr %out, i32 %id + %gep = getelementptr i64, ptr %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr %out.gep ret void } -define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #0 { ; CI-LABEL: flat_atomic_inc_noret_i64_offset_addr64: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -2329,13 +2329,13 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64* %ptr, i32 %id - %gep = getelementptr i64, i64* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i64, ptr %ptr, i32 %id + %gep = getelementptr i64, ptr %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) ret void } -define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(3)* %ptr) #0 { +define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(3) %ptr) #0 { ; CI-LABEL: nocse_lds_atomic_inc_ret_i32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dword s6, s[4:5], 0x4 @@ -2422,11 +2422,11 @@ define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0, ; GFX11-NEXT: global_store_b32 v1, v0, s[2:3] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) - %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) + %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false) + %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false) - store i32 %result0, i32 addrspace(1)* %out0 - store i32 %result1, i32 addrspace(1)* %out1 + store i32 %result0, ptr addrspace(1) %out0 + store i32 %result1, ptr addrspace(1) %out1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll index 0121d27138a72..278e9b03e84fc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll @@ -9,9 +9,9 @@ declare i64 @llvm.amdgcn.dispatch.id() #1 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s6 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s7 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]] -define amdgpu_kernel void @dispatch_id(i64 addrspace(1)* %out) #0 { +define amdgpu_kernel void @dispatch_id(ptr addrspace(1) %out) #0 { %tmp0 = call i64 @llvm.amdgcn.dispatch.id() - store i64 %tmp0, i64 addrspace(1)* %out + store i64 %tmp0, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll index 38505220392b0..b8cfe23a78802 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll @@ -5,14 +5,13 @@ ; GCN-LABEL: {{^}}test: ; GCN: enable_sgpr_dispatch_ptr = 1 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 -define amdgpu_kernel void @test(i32 addrspace(1)* %out) { - %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 - %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* - %value = load i32, i32 addrspace(4)* %header_ptr - store i32 %value, i32 addrspace(1)* %out +define amdgpu_kernel void @test(ptr addrspace(1) %out) { + %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 + %value = load i32, ptr addrspace(4) %dispatch_ptr + store i32 %value, ptr addrspace(1) %out ret void } -declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 +declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 attributes #0 = { readnone } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll index 99ef58147896c..d8705bffe7e90 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll @@ -295,7 +295,7 @@ define amdgpu_ps double @s_div_fmas_f64(double inreg %a, double inreg %b, double ret double %result } -define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { +define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa @@ -409,11 +409,11 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32] ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d) - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { +define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dword s2, s[0:1], 0x13 @@ -516,11 +516,11 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %o ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float 1.0, float %b, float %c, i1 %d) - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %out, float %a, float %b, float %c, [8 x i32], i1 %d) { +define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, float %a, float %b, float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 @@ -623,11 +623,11 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %o ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float 1.0, float %c, i1 %d) - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { +define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa @@ -730,11 +730,11 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %o ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float 1.0, i1 %d) - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) { +define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, double %b, double %c, i1 %d) { ; GFX7-LABEL: test_div_fmas_f64: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0 @@ -843,11 +843,11 @@ define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double % ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d) - store double %result, double addrspace(1)* %out, align 8 + store double %result, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) { +define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %i) { ; GFX7-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2 @@ -956,11 +956,11 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %ou ; GFX11_W64-NEXT: s_endpgm %cmp = icmp eq i32 %i, 0 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cmp) - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) { +define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) { ; GFX7-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa @@ -1060,11 +1060,11 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 false) - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) { +define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) { ; GFX7-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa @@ -1164,11 +1164,11 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 true) - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, float addrspace(1)* %in, [8 x i32], i32 %d) { +define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], i32 %d) { ; GFX7-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 @@ -1326,25 +1326,25 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace ; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1 - %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2 - %gep.out = getelementptr float, float addrspace(1)* %out, i32 2 + %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.b = getelementptr float, ptr addrspace(1) %gep.a, i32 1 + %gep.c = getelementptr float, ptr addrspace(1) %gep.a, i32 2 + %gep.out = getelementptr float, ptr addrspace(1) %out, i32 2 - %a = load volatile float, float addrspace(1)* %gep.a - %b = load volatile float, float addrspace(1)* %gep.b - %c = load volatile float, float addrspace(1)* %gep.c + %a = load volatile float, ptr addrspace(1) %gep.a + %b = load volatile float, ptr addrspace(1) %gep.b + %c = load volatile float, ptr addrspace(1) %gep.c %cmp0 = icmp eq i32 %tid, 0 %cmp1 = icmp ne i32 %d, 0 %and = and i1 %cmp0, %cmp1 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %and) - store float %result, float addrspace(1)* %gep.out, align 4 + store float %result, ptr addrspace(1) %gep.out, align 4 ret void } -define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, [8 x i32], float addrspace(1)* %in, [8 x i32], i32 addrspace(1)* %dummy) { +define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [8 x i32], ptr addrspace(1) %in, [8 x i32], ptr addrspace(1) %dummy) { ; GFX7-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xa @@ -1536,27 +1536,27 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out ; GFX11_W64-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1 - %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2 + %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.b = getelementptr float, ptr addrspace(1) %gep.a, i32 1 + %gep.c = getelementptr float, ptr addrspace(1) %gep.a, i32 2 - %a = load float, float addrspace(1)* %gep.a - %b = load float, float addrspace(1)* %gep.b - %c = load float, float addrspace(1)* %gep.c + %a = load float, ptr addrspace(1) %gep.a + %b = load float, ptr addrspace(1) %gep.b + %c = load float, ptr addrspace(1) %gep.c %cmp0 = icmp eq i32 %tid, 0 br i1 %cmp0, label %bb, label %exit bb: - %val = load i32, i32 addrspace(1)* %dummy + %val = load i32, ptr addrspace(1) %dummy %cmp1 = icmp ne i32 %val, 0 br label %exit exit: %cond = phi i1 [false, %entry], [%cmp1, %bb] - %gep.out = getelementptr float, float addrspace(1)* %out, i32 2 + %gep.out = getelementptr float, ptr addrspace(1) %out, i32 2 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cond) - store float %result, float addrspace(1)* %gep.out, align 4 + store float %result, ptr addrspace(1) %gep.out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll index d75b0a8f10081..bc186982968d4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll @@ -5,7 +5,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s -define amdgpu_kernel void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) { +define amdgpu_kernel void @test_div_scale_f32_1(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX7-LABEL: test_div_scale_f32_1: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -75,19 +75,19 @@ define amdgpu_kernel void @test_div_scale_f32_1(float addrspace(1)* %out, float ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 - %a = load volatile float, float addrspace(1)* %gep.0, align 4 - %b = load volatile float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, ptr addrspace(1) %gep.0, align 4 + %b = load volatile float, ptr addrspace(1) %gep.1, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) { +define amdgpu_kernel void @test_div_scale_f32_2(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX7-LABEL: test_div_scale_f32_2: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -157,19 +157,19 @@ define amdgpu_kernel void @test_div_scale_f32_2(float addrspace(1)* %out, float ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 - %a = load volatile float, float addrspace(1)* %gep.0, align 4 - %b = load volatile float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, ptr addrspace(1) %gep.0, align 4 + %b = load volatile float, ptr addrspace(1) %gep.1, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) { +define amdgpu_kernel void @test_div_scale_f64_1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %in) { ; GFX7-LABEL: test_div_scale_f64_1: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd @@ -245,19 +245,19 @@ define amdgpu_kernel void @test_div_scale_f64_1(double addrspace(1)* %out, doubl ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 + %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1 - %a = load volatile double, double addrspace(1)* %gep.0, align 8 - %b = load volatile double, double addrspace(1)* %gep.1, align 8 + %a = load volatile double, ptr addrspace(1) %gep.0, align 8 + %b = load volatile double, ptr addrspace(1) %gep.1, align 8 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) { +define amdgpu_kernel void @test_div_scale_f64_2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %in) { ; GFX7-LABEL: test_div_scale_f64_2: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd @@ -333,19 +333,19 @@ define amdgpu_kernel void @test_div_scale_f64_2(double addrspace(1)* %out, doubl ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 + %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1 - %a = load volatile double, double addrspace(1)* %gep.0, align 8 - %b = load volatile double, double addrspace(1)* %gep.1, align 8 + %a = load volatile double, ptr addrspace(1) %gep.0, align 8 + %b = load volatile double, ptr addrspace(1) %gep.1, align 8 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, [8 x i32], float %a) { +define amdgpu_kernel void @test_div_scale_f32_scalar_num_1(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], float %a) { ; GFX7-LABEL: test_div_scale_f32_scalar_num_1: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -408,17 +408,17 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_num_1(float addrspace(1)* % ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid - %b = load float, float addrspace(1)* %gep, align 4 + %b = load float, ptr addrspace(1) %gep, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) { +define amdgpu_kernel void @test_div_scale_f32_scalar_num_2(ptr addrspace(1) %out, ptr addrspace(1) %in, float %a) { ; GFX7-LABEL: test_div_scale_f32_scalar_num_2: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -481,17 +481,17 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_num_2(float addrspace(1)* % ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid - %b = load float, float addrspace(1)* %gep, align 4 + %b = load float, ptr addrspace(1) %gep, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) { +define amdgpu_kernel void @test_div_scale_f32_scalar_den_1(ptr addrspace(1) %out, ptr addrspace(1) %in, float %b) { ; GFX7-LABEL: test_div_scale_f32_scalar_den_1: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -554,17 +554,17 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_den_1(float addrspace(1)* % ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid - %a = load float, float addrspace(1)* %gep, align 4 + %a = load float, ptr addrspace(1) %gep, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) { +define amdgpu_kernel void @test_div_scale_f32_scalar_den_2(ptr addrspace(1) %out, ptr addrspace(1) %in, float %b) { ; GFX7-LABEL: test_div_scale_f32_scalar_den_2: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -627,17 +627,17 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_den_2(float addrspace(1)* % ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid - %a = load float, float addrspace(1)* %gep, align 4 + %a = load float, ptr addrspace(1) %gep, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, [8 x i32], double %a) { +define amdgpu_kernel void @test_div_scale_f64_scalar_num_1(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], double %a) { ; GFX7-LABEL: test_div_scale_f64_scalar_num_1: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -701,17 +701,17 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_num_1(double addrspace(1)* ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr double, double addrspace(1)* %in, i32 %tid + %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid - %b = load double, double addrspace(1)* %gep, align 8 + %b = load double, ptr addrspace(1) %gep, align 8 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, [8 x i32], double %a) { +define amdgpu_kernel void @test_div_scale_f64_scalar_num_2(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], double %a) { ; GFX7-LABEL: test_div_scale_f64_scalar_num_2: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -775,17 +775,17 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_num_2(double addrspace(1)* ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr double, double addrspace(1)* %in, i32 %tid + %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid - %b = load double, double addrspace(1)* %gep, align 8 + %b = load double, ptr addrspace(1) %gep, align 8 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, [8 x i32], double %b) { +define amdgpu_kernel void @test_div_scale_f64_scalar_den_1(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], double %b) { ; GFX7-LABEL: test_div_scale_f64_scalar_den_1: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -849,17 +849,17 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_den_1(double addrspace(1)* ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr double, double addrspace(1)* %in, i32 %tid + %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid - %a = load double, double addrspace(1)* %gep, align 8 + %a = load double, ptr addrspace(1) %gep, align 8 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, [8 x i32], double %b) { +define amdgpu_kernel void @test_div_scale_f64_scalar_den_2(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], double %b) { ; GFX7-LABEL: test_div_scale_f64_scalar_den_2: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -923,17 +923,17 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_den_2(double addrspace(1)* ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr double, double addrspace(1)* %in, i32 %tid + %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid - %a = load double, double addrspace(1)* %gep, align 8 + %a = load double, ptr addrspace(1) %gep, align 8 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b) { +define amdgpu_kernel void @test_div_scale_f32_all_scalar_1(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b) { ; GFX7-LABEL: test_div_scale_f32_all_scalar_1: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x1c @@ -986,11 +986,11 @@ define amdgpu_kernel void @test_div_scale_f32_all_scalar_1(float addrspace(1)* % ; GFX11-NEXT: s_endpgm %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b) { +define amdgpu_kernel void @test_div_scale_f32_all_scalar_2(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b) { ; GFX7-LABEL: test_div_scale_f32_all_scalar_2: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x1c @@ -1043,11 +1043,11 @@ define amdgpu_kernel void @test_div_scale_f32_all_scalar_2(float addrspace(1)* % ; GFX11-NEXT: s_endpgm %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) { +define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], double %b) { ; GFX7-LABEL: test_div_scale_f64_all_scalar_1: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x1d @@ -1102,11 +1102,11 @@ define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(double addrspace(1)* ; GFX11-NEXT: s_endpgm %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) { +define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], double %b) { ; GFX7-LABEL: test_div_scale_f64_all_scalar_2: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x1d @@ -1161,11 +1161,11 @@ define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(double addrspace(1)* ; GFX11-NEXT: s_endpgm %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) { +define amdgpu_kernel void @test_div_scale_f32_inline_imm_num(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX7-LABEL: test_div_scale_f32_inline_imm_num: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1224,16 +1224,16 @@ define amdgpu_kernel void @test_div_scale_f32_inline_imm_num(float addrspace(1)* ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %a = load float, float addrspace(1)* %gep.0, align 4 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %a = load float, ptr addrspace(1) %gep.0, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 1.0, float %a, i1 false) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) { +define amdgpu_kernel void @test_div_scale_f32_inline_imm_den(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX7-LABEL: test_div_scale_f32_inline_imm_den: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1292,16 +1292,16 @@ define amdgpu_kernel void @test_div_scale_f32_inline_imm_den(float addrspace(1)* ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %a = load float, float addrspace(1)* %gep.0, align 4 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %a = load float, ptr addrspace(1) %gep.0, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float 2.0, i1 false) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) { +define amdgpu_kernel void @test_div_scale_f32_fabs_num(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX7-LABEL: test_div_scale_f32_fabs_num: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1376,21 +1376,21 @@ define amdgpu_kernel void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 - %a = load volatile float, float addrspace(1)* %gep.0, align 4 - %b = load volatile float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, ptr addrspace(1) %gep.0, align 4 + %b = load volatile float, ptr addrspace(1) %gep.1, align 4 %a.fabs = call float @llvm.fabs.f32(float %a) %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fabs, float %b, i1 false) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) { +define amdgpu_kernel void @test_div_scale_f32_fabs_den(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX7-LABEL: test_div_scale_f32_fabs_den: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1465,21 +1465,21 @@ define amdgpu_kernel void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 - %a = load volatile float, float addrspace(1)* %gep.0, align 4 - %b = load volatile float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, ptr addrspace(1) %gep.0, align 4 + %b = load volatile float, ptr addrspace(1) %gep.1, align 4 %b.fabs = call float @llvm.fabs.f32(float %b) %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fabs, i1 false) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_scale_f32_val_undef_val(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @test_div_scale_f32_val_undef_val(ptr addrspace(1) %out) #0 { ; GFX7-LABEL: test_div_scale_f32_val_undef_val: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1522,11 +1522,11 @@ define amdgpu_kernel void @test_div_scale_f32_val_undef_val(float addrspace(1)* ; GFX11-NEXT: s_endpgm %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 false) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_scale_f32_undef_val_val(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @test_div_scale_f32_undef_val_val(ptr addrspace(1) %out) #0 { ; GFX7-LABEL: test_div_scale_f32_undef_val_val: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1569,11 +1569,11 @@ define amdgpu_kernel void @test_div_scale_f32_undef_val_val(float addrspace(1)* ; GFX11-NEXT: s_endpgm %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float 8.0, i1 false) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(ptr addrspace(1) %out) #0 { ; GFX7-LABEL: test_div_scale_f32_undef_undef_val: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1614,11 +1614,11 @@ define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1) ; GFX11-NEXT: s_endpgm %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @test_div_scale_f64_val_undef_val(double addrspace(1)* %out) #0 { +define amdgpu_kernel void @test_div_scale_f64_val_undef_val(ptr addrspace(1) %out) #0 { ; GFX7-LABEL: test_div_scale_f64_val_undef_val: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1667,7 +1667,7 @@ define amdgpu_kernel void @test_div_scale_f64_val_undef_val(double addrspace(1)* ; GFX11-NEXT: s_endpgm %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double 8.0, double undef, i1 false) %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll index 75de30e5ddf54..c614f1b6b7aa3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll @@ -4,7 +4,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX11 %s -define amdgpu_ps float @ds_fadd_f32_ss(float addrspace(3)* inreg %ptr, float inreg %val) { +define amdgpu_ps float @ds_fadd_f32_ss(ptr addrspace(3) inreg %ptr, float inreg %val) { ; GFX8-LABEL: ds_fadd_f32_ss: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s2 @@ -36,11 +36,11 @@ define amdgpu_ps float @ds_fadd_f32_ss(float addrspace(3)* inreg %ptr, float inr ; GFX11-NEXT: ds_add_rtn_f32 v0, v0, v1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ; return to shader part epilog - %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) ret float %ret } -define amdgpu_ps float @ds_fadd_f32_ss_offset(float addrspace(3)* inreg %ptr, float inreg %val) { +define amdgpu_ps float @ds_fadd_f32_ss_offset(ptr addrspace(3) inreg %ptr, float inreg %val) { ; GFX8-LABEL: ds_fadd_f32_ss_offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s3 @@ -72,12 +72,12 @@ define amdgpu_ps float @ds_fadd_f32_ss_offset(float addrspace(3)* inreg %ptr, fl ; GFX11-NEXT: ds_add_rtn_f32 v0, v1, v0 offset:512 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ; return to shader part epilog - %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 - %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false) ret float %ret } -define amdgpu_ps void @ds_fadd_f32_ss_nortn(float addrspace(3)* inreg %ptr, float inreg %val) { +define amdgpu_ps void @ds_fadd_f32_ss_nortn(ptr addrspace(3) inreg %ptr, float inreg %val) { ; GFX8-LABEL: ds_fadd_f32_ss_nortn: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s2 @@ -105,11 +105,11 @@ define amdgpu_ps void @ds_fadd_f32_ss_nortn(float addrspace(3)* inreg %ptr, floa ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-NEXT: ds_add_f32 v0, v1 ; GFX11-NEXT: s_endpgm - %unused = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + %unused = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) ret void } -define amdgpu_ps void @ds_fadd_f32_ss_offset_nortn(float addrspace(3)* inreg %ptr, float inreg %val) { +define amdgpu_ps void @ds_fadd_f32_ss_offset_nortn(ptr addrspace(3) inreg %ptr, float inreg %val) { ; GFX8-LABEL: ds_fadd_f32_ss_offset_nortn: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s3 @@ -137,12 +137,12 @@ define amdgpu_ps void @ds_fadd_f32_ss_offset_nortn(float addrspace(3)* inreg %pt ; GFX11-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v1, s2 ; GFX11-NEXT: ds_add_f32 v1, v0 offset:512 ; GFX11-NEXT: s_endpgm - %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 - %unused = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128 + %unused = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false) ret void } -define float @ds_fadd_f32_vv(float addrspace(3)* %ptr, float %val) { +define float @ds_fadd_f32_vv(ptr addrspace(3) %ptr, float %val) { ; GFX8-LABEL: ds_fadd_f32_vv: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -165,11 +165,11 @@ define float @ds_fadd_f32_vv(float addrspace(3)* %ptr, float %val) { ; GFX10PLUS-NEXT: ds_add_rtn_f32 v0, v0, v1 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] - %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) ret float %ret } -define float @ds_fadd_f32_vv_offset(float addrspace(3)* %ptr, float %val) { +define float @ds_fadd_f32_vv_offset(ptr addrspace(3) %ptr, float %val) { ; GFX8-LABEL: ds_fadd_f32_vv_offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -192,12 +192,12 @@ define float @ds_fadd_f32_vv_offset(float addrspace(3)* %ptr, float %val) { ; GFX10PLUS-NEXT: ds_add_rtn_f32 v0, v0, v1 offset:512 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 - %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false) ret float %ret } -define void @ds_fadd_f32_vv_nortn(float addrspace(3)* %ptr, float %val) { +define void @ds_fadd_f32_vv_nortn(ptr addrspace(3) %ptr, float %val) { ; GFX8-LABEL: ds_fadd_f32_vv_nortn: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -220,11 +220,11 @@ define void @ds_fadd_f32_vv_nortn(float addrspace(3)* %ptr, float %val) { ; GFX10PLUS-NEXT: ds_add_f32 v0, v1 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] - %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) ret void } -define void @ds_fadd_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) { +define void @ds_fadd_f32_vv_offset_nortn(ptr addrspace(3) %ptr, float %val) { ; GFX8-LABEL: ds_fadd_f32_vv_offset_nortn: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -247,12 +247,12 @@ define void @ds_fadd_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) { ; GFX10PLUS-NEXT: ds_add_f32 v0, v1 offset:512 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 - %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false) ret void } -define float @ds_fadd_f32_vv_volatile(float addrspace(3)* %ptr, float %val) { +define float @ds_fadd_f32_vv_volatile(ptr addrspace(3) %ptr, float %val) { ; GFX8-LABEL: ds_fadd_f32_vv_volatile: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -275,10 +275,10 @@ define float @ds_fadd_f32_vv_volatile(float addrspace(3)* %ptr, float %val) { ; GFX10PLUS-NEXT: ds_add_rtn_f32 v0, v0, v1 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] - %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true) + %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 true) ret float %ret } -declare float @llvm.amdgcn.ds.fadd(float addrspace(3)* nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0 +declare float @llvm.amdgcn.ds.fadd(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0 attributes #0 = { argmemonly nounwind willreturn } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll index 8a11ff509d2a2..e4c4f42b137ef 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll @@ -8,7 +8,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9-MIR %s -define amdgpu_ps float @ds_fmax_f32_ss(float addrspace(3)* inreg %ptr, float inreg %val) { +define amdgpu_ps float @ds_fmax_f32_ss(ptr addrspace(3) inreg %ptr, float inreg %val) { ; GFX8-LABEL: ds_fmax_f32_ss: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s2 @@ -48,11 +48,11 @@ define amdgpu_ps float @ds_fmax_f32_ss(float addrspace(3)* inreg %ptr, float inr ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) ret float %ret } -define amdgpu_ps float @ds_fmax_f32_ss_offset(float addrspace(3)* inreg %ptr, float inreg %val) { +define amdgpu_ps float @ds_fmax_f32_ss_offset(ptr addrspace(3) inreg %ptr, float inreg %val) { ; GFX8-LABEL: ds_fmax_f32_ss_offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s3 @@ -92,12 +92,12 @@ define amdgpu_ps float @ds_fmax_f32_ss_offset(float addrspace(3)* inreg %ptr, fl ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 - %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 - %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false) ret float %ret } -define amdgpu_ps void @ds_fmax_f32_ss_nortn(float addrspace(3)* inreg %ptr, float inreg %val) { +define amdgpu_ps void @ds_fmax_f32_ss_nortn(ptr addrspace(3) inreg %ptr, float inreg %val) { ; GFX8-LABEL: ds_fmax_f32_ss_nortn: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s2 @@ -133,11 +133,11 @@ define amdgpu_ps void @ds_fmax_f32_ss_nortn(float addrspace(3)* inreg %ptr, floa ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR-NEXT: S_ENDPGM 0 - %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + %unused = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) ret void } -define amdgpu_ps void @ds_fmax_f32_ss_offset_nortn(float addrspace(3)* inreg %ptr, float inreg %val) { +define amdgpu_ps void @ds_fmax_f32_ss_offset_nortn(ptr addrspace(3) inreg %ptr, float inreg %val) { ; GFX8-LABEL: ds_fmax_f32_ss_offset_nortn: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s3 @@ -173,12 +173,12 @@ define amdgpu_ps void @ds_fmax_f32_ss_offset_nortn(float addrspace(3)* inreg %pt ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR-NEXT: S_ENDPGM 0 - %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 - %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128 + %unused = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false) ret void } -define float @ds_fmax_f32_vv(float addrspace(3)* %ptr, float %val) { +define float @ds_fmax_f32_vv(ptr addrspace(3) %ptr, float %val) { ; GFX8-LABEL: ds_fmax_f32_vv: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -212,11 +212,11 @@ define float @ds_fmax_f32_vv(float addrspace(3)* %ptr, float %val) { ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR-NEXT: SI_RETURN implicit $vgpr0 - %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) ret float %ret } -define float @ds_fmax_f32_vv_offset(float addrspace(3)* %ptr, float %val) { +define float @ds_fmax_f32_vv_offset(ptr addrspace(3) %ptr, float %val) { ; GFX8-LABEL: ds_fmax_f32_vv_offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -250,12 +250,12 @@ define float @ds_fmax_f32_vv_offset(float addrspace(3)* %ptr, float %val) { ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR-NEXT: SI_RETURN implicit $vgpr0 - %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 - %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false) ret float %ret } -define void @ds_fmax_f32_vv_nortn(float addrspace(3)* %ptr, float %val) { +define void @ds_fmax_f32_vv_nortn(ptr addrspace(3) %ptr, float %val) { ; GFX8-LABEL: ds_fmax_f32_vv_nortn: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -287,11 +287,11 @@ define void @ds_fmax_f32_vv_nortn(float addrspace(3)* %ptr, float %val) { ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR-NEXT: SI_RETURN - %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) ret void } -define void @ds_fmax_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) { +define void @ds_fmax_f32_vv_offset_nortn(ptr addrspace(3) %ptr, float %val) { ; GFX8-LABEL: ds_fmax_f32_vv_offset_nortn: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -323,12 +323,12 @@ define void @ds_fmax_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) { ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR-NEXT: SI_RETURN - %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 - %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false) ret void } -define float @ds_fmax_f32_vv_volatile(float addrspace(3)* %ptr, float %val) { +define float @ds_fmax_f32_vv_volatile(ptr addrspace(3) %ptr, float %val) { ; GFX8-LABEL: ds_fmax_f32_vv_volatile: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -362,10 +362,10 @@ define float @ds_fmax_f32_vv_volatile(float addrspace(3)* %ptr, float %val) { ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR-NEXT: SI_RETURN implicit $vgpr0 - %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true) + %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 true) ret float %ret } -declare float @llvm.amdgcn.ds.fmax(float addrspace(3)* nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0 +declare float @llvm.amdgcn.ds.fmax(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0 attributes #0 = { argmemonly nounwind willreturn } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll index 29e2e6e4ac96c..b824511c3f5c8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll @@ -4,7 +4,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX11 %s -define amdgpu_ps float @ds_fmin_f32_ss(float addrspace(3)* inreg %ptr, float inreg %val) { +define amdgpu_ps float @ds_fmin_f32_ss(ptr addrspace(3) inreg %ptr, float inreg %val) { ; GFX8-LABEL: ds_fmin_f32_ss: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s2 @@ -36,11 +36,11 @@ define amdgpu_ps float @ds_fmin_f32_ss(float addrspace(3)* inreg %ptr, float inr ; GFX11-NEXT: ds_min_rtn_f32 v0, v0, v1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ; return to shader part epilog - %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + %ret = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) ret float %ret } -define amdgpu_ps float @ds_fmin_f32_ss_offset(float addrspace(3)* inreg %ptr, float inreg %val) { +define amdgpu_ps float @ds_fmin_f32_ss_offset(ptr addrspace(3) inreg %ptr, float inreg %val) { ; GFX8-LABEL: ds_fmin_f32_ss_offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s3 @@ -72,12 +72,12 @@ define amdgpu_ps float @ds_fmin_f32_ss_offset(float addrspace(3)* inreg %ptr, fl ; GFX11-NEXT: ds_min_rtn_f32 v0, v1, v0 offset:512 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ; return to shader part epilog - %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 - %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false) ret float %ret } -define amdgpu_ps void @ds_fmin_f32_ss_nortn(float addrspace(3)* inreg %ptr, float inreg %val) { +define amdgpu_ps void @ds_fmin_f32_ss_nortn(ptr addrspace(3) inreg %ptr, float inreg %val) { ; GFX8-LABEL: ds_fmin_f32_ss_nortn: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s2 @@ -105,11 +105,11 @@ define amdgpu_ps void @ds_fmin_f32_ss_nortn(float addrspace(3)* inreg %ptr, floa ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-NEXT: ds_min_f32 v0, v1 ; GFX11-NEXT: s_endpgm - %unused = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + %unused = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) ret void } -define amdgpu_ps void @ds_fmin_f32_ss_offset_nortn(float addrspace(3)* inreg %ptr, float inreg %val) { +define amdgpu_ps void @ds_fmin_f32_ss_offset_nortn(ptr addrspace(3) inreg %ptr, float inreg %val) { ; GFX8-LABEL: ds_fmin_f32_ss_offset_nortn: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s3 @@ -137,12 +137,12 @@ define amdgpu_ps void @ds_fmin_f32_ss_offset_nortn(float addrspace(3)* inreg %pt ; GFX11-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v1, s2 ; GFX11-NEXT: ds_min_f32 v1, v0 offset:512 ; GFX11-NEXT: s_endpgm - %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 - %unused = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128 + %unused = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false) ret void } -define float @ds_fmin_f32_vv(float addrspace(3)* %ptr, float %val) { +define float @ds_fmin_f32_vv(ptr addrspace(3) %ptr, float %val) { ; GFX8-LABEL: ds_fmin_f32_vv: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -165,11 +165,11 @@ define float @ds_fmin_f32_vv(float addrspace(3)* %ptr, float %val) { ; GFX10PLUS-NEXT: ds_min_rtn_f32 v0, v0, v1 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] - %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + %ret = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) ret float %ret } -define float @ds_fmin_f32_vv_offset(float addrspace(3)* %ptr, float %val) { +define float @ds_fmin_f32_vv_offset(ptr addrspace(3) %ptr, float %val) { ; GFX8-LABEL: ds_fmin_f32_vv_offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -192,12 +192,12 @@ define float @ds_fmin_f32_vv_offset(float addrspace(3)* %ptr, float %val) { ; GFX10PLUS-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:512 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 - %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false) ret float %ret } -define void @ds_fmin_f32_vv_nortn(float addrspace(3)* %ptr, float %val) { +define void @ds_fmin_f32_vv_nortn(ptr addrspace(3) %ptr, float %val) { ; GFX8-LABEL: ds_fmin_f32_vv_nortn: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -220,11 +220,11 @@ define void @ds_fmin_f32_vv_nortn(float addrspace(3)* %ptr, float %val) { ; GFX10PLUS-NEXT: ds_min_f32 v0, v1 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] - %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + %ret = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) ret void } -define void @ds_fmin_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) { +define void @ds_fmin_f32_vv_offset_nortn(ptr addrspace(3) %ptr, float %val) { ; GFX8-LABEL: ds_fmin_f32_vv_offset_nortn: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -247,12 +247,12 @@ define void @ds_fmin_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) { ; GFX10PLUS-NEXT: ds_min_f32 v0, v1 offset:512 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 - %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false) ret void } -define float @ds_fmin_f32_vv_volatile(float addrspace(3)* %ptr, float %val) { +define float @ds_fmin_f32_vv_volatile(ptr addrspace(3) %ptr, float %val) { ; GFX8-LABEL: ds_fmin_f32_vv_volatile: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -275,10 +275,10 @@ define float @ds_fmin_f32_vv_volatile(float addrspace(3)* %ptr, float %val) { ; GFX10PLUS-NEXT: ds_min_rtn_f32 v0, v0, v1 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] - %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true) + %ret = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 true) ret float %ret } -declare float @llvm.amdgcn.ds.fmin(float addrspace(3)* nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0 +declare float @llvm.amdgcn.ds.fmin(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0 attributes #0 = { argmemonly nounwind willreturn } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll index fd2251bff0b1c..2f558181bf613 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll @@ -47,12 +47,12 @@ entry: br i1 %cond, label %mid, label %bb mid: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef br label %bb bb: call void @llvm.amdgcn.end.cf.i32(i32 %saved) - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll index 1e39c6395ca7d..81d8472ebd46e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll @@ -25,12 +25,12 @@ entry: br i1 %cond, label %mid, label %bb mid: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef br label %bb bb: call void @llvm.amdgcn.end.cf.i64(i64 %saved) - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll index 419bdb2076cca..f4a3234c73ee6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX10 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX11 -define i32 @global_atomic_csub(i32 addrspace(1)* %ptr, i32 %data) { +define i32 @global_atomic_csub(ptr addrspace(1) %ptr, i32 %data) { ; GFX10-LABEL: global_atomic_csub: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -19,11 +19,11 @@ define i32 @global_atomic_csub(i32 addrspace(1)* %ptr, i32 %data) { ; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %data) + %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %ptr, i32 %data) ret i32 %ret } -define i32 @global_atomic_csub_offset(i32 addrspace(1)* %ptr, i32 %data) { +define i32 @global_atomic_csub_offset(ptr addrspace(1) %ptr, i32 %data) { ; GFX10-LABEL: global_atomic_csub_offset: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -50,12 +50,12 @@ define i32 @global_atomic_csub_offset(i32 addrspace(1)* %ptr, i32 %data) { ; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024 - %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data) + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024 + %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %gep, i32 %data) ret i32 %ret } -define void @global_atomic_csub_nortn(i32 addrspace(1)* %ptr, i32 %data) { +define void @global_atomic_csub_nortn(ptr addrspace(1) %ptr, i32 %data) { ; GFX10-LABEL: global_atomic_csub_nortn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -71,11 +71,11 @@ define void @global_atomic_csub_nortn(i32 addrspace(1)* %ptr, i32 %data) { ; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %data) + %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %ptr, i32 %data) ret void } -define void @global_atomic_csub_offset_nortn(i32 addrspace(1)* %ptr, i32 %data) { +define void @global_atomic_csub_offset_nortn(ptr addrspace(1) %ptr, i32 %data) { ; GFX10-LABEL: global_atomic_csub_offset_nortn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -102,12 +102,12 @@ define void @global_atomic_csub_offset_nortn(i32 addrspace(1)* %ptr, i32 %data) ; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024 - %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data) + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024 + %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %gep, i32 %data) ret void } -define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset(i32 addrspace(1)* %ptr, i32 %data) { +define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset(ptr addrspace(1) %ptr, i32 %data) { ; GFX10-LABEL: global_atomic_csub_sgpr_base_offset: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 @@ -133,13 +133,13 @@ define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset(i32 addrspace(1)* ; GFX11-NEXT: global_store_b32 v[0:1], v0, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024 - %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data) - store i32 %ret, i32 addrspace(1)* undef + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024 + %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %gep, i32 %data) + store i32 %ret, ptr addrspace(1) undef ret void } -define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset_nortn(i32 addrspace(1)* %ptr, i32 %data) { +define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset_nortn(ptr addrspace(1) %ptr, i32 %data) { ; GFX10-LABEL: global_atomic_csub_sgpr_base_offset_nortn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 @@ -161,12 +161,12 @@ define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset_nortn(i32 addrspa ; GFX11-NEXT: global_atomic_csub_u32 v0, v1, v0, s[0:1] glc ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024 - %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data) + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024 + %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %gep, i32 %data) ret void } -declare i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* nocapture, i32) #1 +declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #1 attributes #0 = { nounwind willreturn } attributes #1 = { argmemonly nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll index 5681953cb3a62..ba9d5378c2767 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll @@ -1,21 +1,21 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s ; RUN: not --crash llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908 -declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* nocapture, float) -declare <2 x half> @llvm.amdgcn.global.atomic.fadd.f32.p1v2f16.v2f16(<2 x half> addrspace(1)* nocapture, <2 x half>) +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) +declare <2 x half> @llvm.amdgcn.global.atomic.fadd.f32.p1.v2f16(ptr addrspace(1) nocapture, <2 x half>) ; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.global.atomic.fadd) ; GFX90A-LABEL: {{^}}global_atomic_fadd_f32_rtn: ; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off glc -define float @global_atomic_fadd_f32_rtn(float addrspace(1)* %ptr, float %data) { - %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) +define float @global_atomic_fadd_f32_rtn(ptr addrspace(1) %ptr, float %data) { + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret float %ret } ; GFX90A-LABEL: {{^}}global_atomic_fadd_v2f16_rtn: ; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off glc -define <2 x half> @global_atomic_fadd_v2f16_rtn(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { - %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.f32.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) +define <2 x half> @global_atomic_fadd_v2f16_rtn(ptr addrspace(1) %ptr, <2 x half> %data) { + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.f32.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret <2 x half> %ret } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll index 4af59846cbb51..752ddbb896c6b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX908 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s -define void @global_atomic_fadd_f32(float addrspace(1)* %ptr, float %data) { +define void @global_atomic_fadd_f32(ptr addrspace(1) %ptr, float %data) { ; GFX908-LABEL: global_atomic_fadd_f32: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -16,11 +16,11 @@ define void @global_atomic_fadd_f32(float addrspace(1)* %ptr, float %data) { ; GFX90A-NEXT: global_atomic_add_f32 v[0:1], v2, off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] - %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret void } -define void @global_atomic_fadd_f32_off_2048(float addrspace(1)* %ptr, float %data) { +define void @global_atomic_fadd_f32_off_2048(ptr addrspace(1) %ptr, float %data) { ; GFX908-LABEL: global_atomic_fadd_f32_off_2048: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -34,12 +34,12 @@ define void @global_atomic_fadd_f32_off_2048(float addrspace(1)* %ptr, float %da ; GFX90A-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2048 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, float addrspace(1)* %ptr, i64 512 - %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %gep, float %data) + %gep = getelementptr float, ptr addrspace(1) %ptr, i64 512 + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %gep, float %data) ret void } -define void @global_atomic_fadd_f32_off_neg2047(float addrspace(1)* %ptr, float %data) { +define void @global_atomic_fadd_f32_off_neg2047(ptr addrspace(1) %ptr, float %data) { ; GFX908-LABEL: global_atomic_fadd_f32_off_neg2047: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -53,12 +53,12 @@ define void @global_atomic_fadd_f32_off_neg2047(float addrspace(1)* %ptr, float ; GFX90A-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:-2044 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, float addrspace(1)* %ptr, i64 -511 - %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %gep, float %data) + %gep = getelementptr float, ptr addrspace(1) %ptr, i64 -511 + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %gep, float %data) ret void } -define amdgpu_kernel void @global_atomic_fadd_f32_off_ss(float addrspace(1)* %ptr, float %data) { +define amdgpu_kernel void @global_atomic_fadd_f32_off_ss(ptr addrspace(1) %ptr, float %data) { ; GFX908-LABEL: global_atomic_fadd_f32_off_ss: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_load_dword s2, s[4:5], 0x8 @@ -78,12 +78,12 @@ define amdgpu_kernel void @global_atomic_fadd_f32_off_ss(float addrspace(1)* %pt ; GFX90A-NEXT: v_mov_b32_e32 v0, s2 ; GFX90A-NEXT: global_atomic_add_f32 v1, v0, s[0:1] offset:2048 ; GFX90A-NEXT: s_endpgm - %gep = getelementptr float, float addrspace(1)* %ptr, i64 512 - %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %gep, float %data) + %gep = getelementptr float, ptr addrspace(1) %ptr, i64 512 + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %gep, float %data) ret void } -define void @global_atomic_fadd_v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { +define void @global_atomic_fadd_v2f16(ptr addrspace(1) %ptr, <2 x half> %data) { ; GFX908-LABEL: global_atomic_fadd_v2f16: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -97,11 +97,11 @@ define void @global_atomic_fadd_v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> ; GFX90A-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] - %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret void } -define void @global_atomic_fadd_v2f16_off_neg2047(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { +define void @global_atomic_fadd_v2f16_off_neg2047(ptr addrspace(1) %ptr, <2 x half> %data) { ; GFX908-LABEL: global_atomic_fadd_v2f16_off_neg2047: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -115,12 +115,12 @@ define void @global_atomic_fadd_v2f16_off_neg2047(<2 x half> addrspace(1)* %ptr, ; GFX90A-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:-2044 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 -511 - %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %gep, <2 x half> %data) + %gep = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 -511 + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %gep, <2 x half> %data) ret void } -declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* nocapture, float) #0 -declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* nocapture, <2 x half>) #0 +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) #0 +declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) nocapture, <2 x half>) #0 attributes #0 = { argmemonly nounwind willreturn } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll index cc1cf733efa6e..a1cfcc6c3f4fc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll @@ -38,7 +38,7 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) { entry: %cond = icmp eq i32 %arg0, 0 %break = call i32 @llvm.amdgcn.if.break.i32(i1 %cond, i32 %saved) - store volatile i32 %break, i32 addrspace(1)* undef + store volatile i32 %break, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll index 7d2a57643bc3f..9718cef5c6db0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll @@ -20,7 +20,7 @@ define amdgpu_kernel void @test_wave64(i32 %arg0, [8 x i32], i64 %saved) { entry: %cond = icmp eq i32 %arg0, 0 %break = call i64 @llvm.amdgcn.if.break.i64(i1 %cond, i64 %saved) - store volatile i64 %break, i64 addrspace(1)* undef + store volatile i64 %break, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll index 9b77132581fd6..8e02052d7a905 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll @@ -35,7 +35,7 @@ define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s, ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) { +define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t) { ; GFX6-LABEL: load_2d_v4f32_xyzw_tfe: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v5, v0 @@ -119,11 +119,11 @@ define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 %v.err = extractvalue { <4 x float>, i32 } %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) { +define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t) { ; GFX6-LABEL: load_2d_v4f32_xyzw_tfe_lwe: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v5, v0 @@ -207,7 +207,7 @@ define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 3, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 %v.err = extractvalue { <4 x float>, i32 } %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll index 13677ae7b5e68..cc248964e9914 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll @@ -40,7 +40,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw(<8 x i32> inreg %rsrc, ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) { +define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) { ; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_mov_b32 s0, s2 @@ -128,11 +128,11 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %r %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 %v.err = extractvalue { <4 x float>, i32 } %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) { +define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) { ; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_mov_b32 s0, s2 @@ -220,7 +220,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inre %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 3, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 %v.err = extractvalue { <4 x float>, i32 } %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll index 2e3ecd03b9f66..6a7e847528898 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll @@ -35,7 +35,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw(<8 x i32> inreg %rsrc, ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { ; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v5, v0 @@ -124,11 +124,11 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %r %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 %v.err = extractvalue { <4 x float>, i32 } %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { ; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v5, v0 @@ -217,7 +217,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inre %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 %v.err = extractvalue { <4 x float>, i32 } %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll index d6413e956b9da..f831b118285a3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll @@ -38,7 +38,7 @@ define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw(<8 x i32> inreg %rsrc, i16 %s, ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i16 %s, i16 %t, i16 %r) { +define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %r) { ; GFX9-LABEL: load_3d_v4f32_xyzw_tfe: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_mov_b32 s0, s2 @@ -123,11 +123,11 @@ define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 %v.err = extractvalue { <4 x float>, i32 } %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i16 %s, i16 %t, i16 %r) { +define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %r) { ; GFX9-LABEL: load_3d_v4f32_xyzw_tfe_lwe: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_mov_b32 s0, s2 @@ -212,7 +212,7 @@ define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 3, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 %v.err = extractvalue { <4 x float>, i32 } %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll index ab0b3bb83a36f..f22877e3c62e2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll @@ -35,7 +35,7 @@ define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s, ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %r) { +define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %r) { ; GFX6-LABEL: load_3d_v4f32_xyzw_tfe: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v5, v0 @@ -121,11 +121,11 @@ define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 %v.err = extractvalue { <4 x float>, i32 } %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %r) { +define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %r) { ; GFX6-LABEL: load_3d_v4f32_xyzw_tfe_lwe: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v5, v0 @@ -211,7 +211,7 @@ define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 %v.err = extractvalue { <4 x float>, i32 } %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.buffer.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.buffer.ptr.ll index 2028b8abe9606..7d693d8a1bb29 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.buffer.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.buffer.ptr.ll @@ -5,13 +5,12 @@ ; GCN-LABEL: {{^}}test_ps: ; GCN: s_load_dword s{{[0-9]+}}, s[0:1], 0x0 define amdgpu_ps i32 @test_ps() #1 { - %implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() - %buffer_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)* - %value = load volatile i32, i32 addrspace(4)* %buffer_ptr + %implicit_buffer_ptr = call ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() + %value = load volatile i32, ptr addrspace(4) %implicit_buffer_ptr ret i32 %value } -declare i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() #0 +declare ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() #0 attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll index 56d818ed4e0f7..329e6fbae47ad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll @@ -73,7 +73,7 @@ main_body: ret void } -define amdgpu_ps void @v_interp_f32_many_vm(float addrspace(1)* %ptr, i32 inreg %m0) #0 { +define amdgpu_ps void @v_interp_f32_many_vm(ptr addrspace(1) %ptr, i32 inreg %m0) #0 { ; GCN-LABEL: v_interp_f32_many_vm: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: global_load_b64 v[0:1], v[0:1], off offset:4 @@ -99,10 +99,10 @@ define amdgpu_ps void @v_interp_f32_many_vm(float addrspace(1)* %ptr, i32 inreg ; GCN-NEXT: exp mrt0 v6, v7, v8, v0 done ; GCN-NEXT: s_endpgm main_body: - %i.ptr = getelementptr float, float addrspace(1)* %ptr, i32 1 - %i = load float, float addrspace(1)* %i.ptr, align 4 - %j.ptr = getelementptr float, float addrspace(1)* %ptr, i32 2 - %j = load float, float addrspace(1)* %j.ptr, align 4 + %i.ptr = getelementptr float, ptr addrspace(1) %ptr, i32 1 + %i = load float, ptr addrspace(1) %i.ptr, align 4 + %j.ptr = getelementptr float, ptr addrspace(1) %ptr, i32 2 + %j = load float, ptr addrspace(1) %j.ptr, align 4 %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0) %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0) %p2 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 2, i32 %m0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll index 576d71879a3d9..463b59a4fd9f2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll @@ -605,7 +605,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node ret <4 x float> %r } -define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr, float* %p_ray, <4 x i32> inreg %tdescr) { +define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(ptr %p_node_ptr, ptr %p_ray, <4 x i32> inreg %tdescr) { ; GFX1030-LABEL: image_bvh_intersect_ray_nsa_reassign: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 @@ -702,10 +702,10 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep_node_ptr = getelementptr inbounds i32, i32* %p_node_ptr, i32 %lid - %node_ptr = load i32, i32* %gep_node_ptr, align 4 - %gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid - %ray_extent = load float, float* %gep_ray, align 4 + %gep_node_ptr = getelementptr inbounds i32, ptr %p_node_ptr, i32 %lid + %node_ptr = load i32, ptr %gep_node_ptr, align 4 + %gep_ray = getelementptr inbounds float, ptr %p_ray, i32 %lid + %ray_extent = load float, ptr %gep_ray, align 4 %ray_origin0 = insertelement <3 x float> undef, float 0.0, i32 0 %ray_origin1 = insertelement <3 x float> %ray_origin0, float 1.0, i32 1 %ray_origin = insertelement <3 x float> %ray_origin1, float 2.0, i32 2 @@ -716,11 +716,11 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr %ray_inv_dir1 = insertelement <3 x float> %ray_inv_dir0, float 7.0, i32 1 %ray_inv_dir = insertelement <3 x float> %ray_inv_dir1, float 8.0, i32 2 %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) - store <4 x i32> %v, <4 x i32>* undef + store <4 x i32> %v, ptr undef ret void } -define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node_ptr, float* %p_ray, <4 x i32> inreg %tdescr) { +define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(ptr %p_node_ptr, ptr %p_ray, <4 x i32> inreg %tdescr) { ; GFX1030-LABEL: image_bvh_intersect_ray_a16_nsa_reassign: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 @@ -842,10 +842,10 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep_node_ptr = getelementptr inbounds i32, i32* %p_node_ptr, i32 %lid - %node_ptr = load i32, i32* %gep_node_ptr, align 4 - %gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid - %ray_extent = load float, float* %gep_ray, align 4 + %gep_node_ptr = getelementptr inbounds i32, ptr %p_node_ptr, i32 %lid + %node_ptr = load i32, ptr %gep_node_ptr, align 4 + %gep_ray = getelementptr inbounds float, ptr %p_ray, i32 %lid + %ray_extent = load float, ptr %gep_ray, align 4 %ray_origin0 = insertelement <3 x float> undef, float 0.0, i32 0 %ray_origin1 = insertelement <3 x float> %ray_origin0, float 1.0, i32 1 %ray_origin = insertelement <3 x float> %ray_origin1, float 2.0, i32 2 @@ -856,11 +856,11 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node %ray_inv_dir1 = insertelement <3 x half> %ray_inv_dir0, half 7.0, i32 1 %ray_inv_dir = insertelement <3 x half> %ray_inv_dir1, half 8.0, i32 2 %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) - store <4 x i32> %v, <4 x i32>* undef + store <4 x i32> %v, ptr undef ret void } -define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray, <4 x i32> inreg %tdescr) { +define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(ptr %p_ray, <4 x i32> inreg %tdescr) { ; GFX1030-LABEL: image_bvh64_intersect_ray_nsa_reassign: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_clause 0x1 @@ -956,8 +956,8 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray, ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid - %ray_extent = load float, float* %gep_ray, align 4 + %gep_ray = getelementptr inbounds float, ptr %p_ray, i32 %lid + %ray_extent = load float, ptr %gep_ray, align 4 %ray_origin0 = insertelement <3 x float> undef, float 0.0, i32 0 %ray_origin1 = insertelement <3 x float> %ray_origin0, float 1.0, i32 1 %ray_origin = insertelement <3 x float> %ray_origin1, float 2.0, i32 2 @@ -968,11 +968,11 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray, %ray_inv_dir1 = insertelement <3 x float> %ray_inv_dir0, float 7.0, i32 1 %ray_inv_dir = insertelement <3 x float> %ray_inv_dir1, float 8.0, i32 2 %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 1111111111111, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) - store <4 x i32> %v, <4 x i32>* undef + store <4 x i32> %v, ptr undef ret void } -define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_ray, <4 x i32> inreg %tdescr) { +define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray, <4 x i32> inreg %tdescr) { ; GFX1030-LABEL: image_bvh64_intersect_ray_a16_nsa_reassign: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_clause 0x1 @@ -1094,8 +1094,8 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid - %ray_extent = load float, float* %gep_ray, align 4 + %gep_ray = getelementptr inbounds float, ptr %p_ray, i32 %lid + %ray_extent = load float, ptr %gep_ray, align 4 %ray_origin0 = insertelement <3 x float> undef, float 0.0, i32 0 %ray_origin1 = insertelement <3 x float> %ray_origin0, float 1.0, i32 1 %ray_origin = insertelement <3 x float> %ray_origin1, float 2.0, i32 2 @@ -1106,6 +1106,6 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_ %ray_inv_dir1 = insertelement <3 x half> %ray_inv_dir0, half 7.0, i32 1 %ray_inv_dir = insertelement <3 x half> %ray_inv_dir1, half 8.0, i32 2 %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 1111111111110, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) - store <4 x i32> %v, <4 x i32>* undef + store <4 x i32> %v, ptr undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll index 34b07a1d48e5d..b2e4d6787b74c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll @@ -6,7 +6,7 @@ ; TODO: Merge with DAG test -define amdgpu_kernel void @is_private_vgpr(i8* addrspace(1)* %ptr.ptr) { +define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) { ; CI-LABEL: is_private_vgpr: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -70,15 +70,15 @@ define amdgpu_kernel void @is_private_vgpr(i8* addrspace(1)* %ptr.ptr) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i8*, i8* addrspace(1)* %ptr.ptr, i32 %id - %ptr = load volatile i8*, i8* addrspace(1)* %gep - %val = call i1 @llvm.amdgcn.is.private(i8* %ptr) + %gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id + %ptr = load volatile ptr, ptr addrspace(1) %gep + %val = call i1 @llvm.amdgcn.is.private(ptr %ptr) %ext = zext i1 %val to i32 - store i32 %ext, i32 addrspace(1)* undef + store i32 %ext, ptr addrspace(1) undef ret void } -define amdgpu_kernel void @is_private_sgpr(i8* %ptr) { +define amdgpu_kernel void @is_private_sgpr(ptr %ptr) { ; CI-LABEL: is_private_sgpr: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -140,11 +140,11 @@ define amdgpu_kernel void @is_private_sgpr(i8* %ptr) { ; GFX11-NEXT: .LBB1_2: ; %bb1 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %val = call i1 @llvm.amdgcn.is.private(i8* %ptr) + %val = call i1 @llvm.amdgcn.is.private(ptr %ptr) br i1 %val, label %bb0, label %bb1 bb0: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef br label %bb1 bb1: @@ -152,6 +152,6 @@ bb1: } declare i32 @llvm.amdgcn.workitem.id.x() #0 -declare i1 @llvm.amdgcn.is.private(i8* nocapture) #0 +declare i1 @llvm.amdgcn.is.private(ptr nocapture) #0 attributes #0 = { nounwind readnone speculatable } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll index 8aee6d71e20ce..048fb5586ac0d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll @@ -6,7 +6,7 @@ ; TODO: Merge with DAG test -define amdgpu_kernel void @is_local_vgpr(i8* addrspace(1)* %ptr.ptr) { +define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) { ; CI-LABEL: is_local_vgpr: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -70,15 +70,15 @@ define amdgpu_kernel void @is_local_vgpr(i8* addrspace(1)* %ptr.ptr) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i8*, i8* addrspace(1)* %ptr.ptr, i32 %id - %ptr = load volatile i8*, i8* addrspace(1)* %gep - %val = call i1 @llvm.amdgcn.is.shared(i8* %ptr) + %gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id + %ptr = load volatile ptr, ptr addrspace(1) %gep + %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr) %ext = zext i1 %val to i32 - store i32 %ext, i32 addrspace(1)* undef + store i32 %ext, ptr addrspace(1) undef ret void } -define amdgpu_kernel void @is_local_sgpr(i8* %ptr) { +define amdgpu_kernel void @is_local_sgpr(ptr %ptr) { ; CI-LABEL: is_local_sgpr: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -140,11 +140,11 @@ define amdgpu_kernel void @is_local_sgpr(i8* %ptr) { ; GFX11-NEXT: .LBB1_2: ; %bb1 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %val = call i1 @llvm.amdgcn.is.shared(i8* %ptr) + %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr) br i1 %val, label %bb0, label %bb1 bb0: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef br label %bb1 bb1: @@ -152,6 +152,6 @@ bb1: } declare i32 @llvm.amdgcn.workitem.id.x() #0 -declare i1 @llvm.amdgcn.is.shared(i8* nocapture) #0 +declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0 attributes #0 = { nounwind readnone speculatable } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll index ae04ab5ca9a71..3c049b4ee0ac9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll @@ -10,12 +10,11 @@ ; CO-V2: s_load_dword s{{[0-9]+}}, s[4:5], 0xa ; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa -define amdgpu_kernel void @test(i32 addrspace(1)* %out) #1 { - %kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() - %header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)* - %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10 - %value = load i32, i32 addrspace(4)* %gep - store i32 %value, i32 addrspace(1)* %out +define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 { + %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() + %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10 + %value = load i32, ptr addrspace(4) %gep + store i32 %value, ptr addrspace(1) %out ret void } @@ -27,12 +26,11 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out) #1 { ; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15 ; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x15 -define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 { - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %header.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10 - %value = load i32, i32 addrspace(4)* %gep - store i32 %value, i32 addrspace(1)* %out +define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 { + %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr i32, ptr addrspace(4) %implicitarg.ptr, i64 10 + %value = load i32, ptr addrspace(4) %gep + store i32 %value, ptr addrspace(1) %out ret void } @@ -47,11 +45,10 @@ define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 { ; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3 ; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]] ; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]] -define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x i8> %in) #1 { - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %val = load i32, i32 addrspace(4)* %arg.ptr - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #1 { + %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %val = load i32, ptr addrspace(4) %implicitarg.ptr + store i32 %val, ptr addrspace(1) %out ret void } @@ -66,11 +63,10 @@ define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x ; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3 ; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]] ; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]] -define amdgpu_kernel void @opencl_test_implicit_alignment(i32 addrspace(1)* %out, <2 x i8> %in) #2 { - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %val = load i32, i32 addrspace(4)* %arg.ptr - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #2 { + %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %val = load i32, ptr addrspace(4) %implicitarg.ptr + store i32 %val, ptr addrspace(1) %out ret void } @@ -82,11 +78,10 @@ define amdgpu_kernel void @opencl_test_implicit_alignment(i32 addrspace(1)* %out ; HSA: s_mov_b64 [[OFFSET_NULL:s\[[0-9]+:[0-9]+\]]], 40{{$}} ; HSA: s_load_dword s{{[0-9]+}}, [[OFFSET_NULL]] define amdgpu_kernel void @test_no_kernargs() #1 { - %kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() - %header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)* - %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10 - %value = load i32, i32 addrspace(4)* %gep - store volatile i32 %value, i32 addrspace(1)* undef + %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() + %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10 + %value = load i32, ptr addrspace(4) %gep + store volatile i32 %value, ptr addrspace(1) undef ret void } @@ -95,10 +90,9 @@ define amdgpu_kernel void @test_no_kernargs() #1 { ; OS-MESA3D: kernarg_segment_byte_size = 16 ; CO-V2: kernarg_segment_alignment = 4 define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 { - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %val = load volatile i32, i32 addrspace(4)* %arg.ptr - store volatile i32 %val, i32 addrspace(1)* null + %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr + store volatile i32 %val, ptr addrspace(1) null ret void } @@ -107,23 +101,22 @@ define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() ; OS-MESA3D: kernarg_segment_byte_size = 16 ; CO-V2: kernarg_segment_alignment = 4 define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() #3 { - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %val = load volatile i32, i32 addrspace(4)* %arg.ptr - store volatile i32 %val, i32 addrspace(1)* null + %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr + store volatile i32 %val, ptr addrspace(1) null ret void } ; ALL-LABEL: {{^}}func_kernarg_segment_ptr: ; ALL: v_mov_b32_e32 v0, 0{{$}} ; ALL: v_mov_b32_e32 v1, 0{{$}} -define i8 addrspace(4)* @func_kernarg_segment_ptr() { - %ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() - ret i8 addrspace(4)* %ptr +define ptr addrspace(4) @func_kernarg_segment_ptr() { + %ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() + ret ptr addrspace(4) %ptr } -declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 -declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 +declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0 +declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0 attributes #0 = { nounwind readnone } attributes #1 = { nounwind "amdgpu-implicitarg-num-bytes"="0" } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll index c0b5f13139434..aecfbe7aa2260 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll @@ -10,7 +10,7 @@ declare <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double, double, <4 x doubl declare double @llvm.amdgcn.mfma.f64.4x4x4f64(double, double, double, i32, i32, i32) declare i32 @llvm.amdgcn.workitem.id.x() -define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) #0 { ; GCN-LABEL: test_mfma_f32_32x32x4bf16_1k: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[34:35], s[0:1], 0x24 @@ -70,15 +70,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(<32 x float> addrspace(1 ; GCN-NEXT: global_store_dwordx4 v0, a[28:31], s[34:35] offset:112 ; GCN-NEXT: s_endpgm bb: - %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg + %in.1 = load <32 x float>, ptr addrspace(1) %arg %a = bitcast i64 1 to <4 x i16> %b = bitcast i64 2 to <4 x i16> %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x i16> %a, <4 x i16> %b, <32 x float> %in.1, i32 1, i32 2, i32 3) - store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg + store <32 x float> %mai.1, ptr addrspace(1) %arg ret void } -define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) #0 { ; GCN-LABEL: test_mfma_f32_16x16x4bf16_1k: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x24 @@ -116,15 +116,15 @@ define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(<16 x float> addrspace(1 ; GCN-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48 ; GCN-NEXT: s_endpgm bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %a = bitcast i64 1 to <4 x i16> %b = bitcast i64 2 to <4 x i16> %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x i16> %a, <4 x i16> %b, <16 x float> %in.1, i32 1, i32 2, i32 3) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } -define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(ptr addrspace(1) %arg) #0 { ; GCN-LABEL: test_mfma_f32_4x4x4bf16_1k: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 @@ -146,15 +146,15 @@ define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(<4 x float> addrspace(1)* ; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[4:5] ; GCN-NEXT: s_endpgm bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %a = bitcast i64 1 to <4 x i16> %b = bitcast i64 2 to <4 x i16> %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4bf16.1k(<4 x i16> %a, <4 x i16> %b, <4 x float> %in.1, i32 1, i32 2, i32 3) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } -define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) #0 { ; GCN-LABEL: test_mfma_f32_32x32x8bf16_1k: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x24 @@ -193,15 +193,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(<16 x float> addrspace(1 ; GCN-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48 ; GCN-NEXT: s_endpgm bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %a = bitcast i64 1 to <4 x i16> %b = bitcast i64 2 to <4 x i16> %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8bf16.1k(<4 x i16> %a, <4 x i16> %b, <16 x float> %in.1, i32 1, i32 2, i32 3) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } -define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg) #0 { ; GCN-LABEL: test_mfma_f32_16x16x16bf16_1k: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 @@ -224,15 +224,15 @@ define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(<4 x float> addrspace(1 ; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[4:5] ; GCN-NEXT: s_endpgm bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %a = bitcast i64 1 to <4 x i16> %b = bitcast i64 2 to <4 x i16> %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16bf16.1k(<4 x i16> %a, <4 x i16> %b, <4 x float> %in.1, i32 1, i32 2, i32 3) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } -define amdgpu_kernel void @test_mfma_f64_4x4x4f64(double addrspace(1)* %arg, double %a, double %b) #0 { +define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double %a, double %b) #0 { ; GCN-LABEL: test_mfma_f64_4x4x4f64: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 @@ -251,11 +251,11 @@ define amdgpu_kernel void @test_mfma_f64_4x4x4f64(double addrspace(1)* %arg, dou bb: %mai.1 = tail call double @llvm.amdgcn.mfma.f64.4x4x4f64(double %a, double %b, double 0.0, i32 0, i32 0, i32 0) %mai.2 = tail call double @llvm.amdgcn.mfma.f64.4x4x4f64(double %a, double %b, double %mai.1, i32 1, i32 2, i32 3) - store double %mai.2, double addrspace(1)* %arg + store double %mai.2, ptr addrspace(1) %arg ret void } -define amdgpu_kernel void @test_mfma_f64_16x16x4f64(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 { +define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, double %a, double %b) #0 { ; GCN-LABEL: test_mfma_f64_16x16x4f64: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x24 @@ -283,13 +283,13 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(<4 x double> addrspace(1)* % ; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[8:9] offset:16 ; GCN-NEXT: s_endpgm bb: - %in.1 = load <4 x double>, <4 x double> addrspace(1)* %arg + %in.1 = load <4 x double>, ptr addrspace(1) %arg %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> %in.1, i32 1, i32 2, i32 3) - store <4 x double> %mai.1, <4 x double> addrspace(1)* %arg + store <4 x double> %mai.1, ptr addrspace(1) %arg ret void } -define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 { +define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(ptr addrspace(1) %arg, double %a, double %b) #0 { ; GCN-LABEL: test_mfma_f64_16x16x4f64_splat_imm: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 @@ -310,11 +310,11 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(<4 x double> addrs bb: %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> , i32 0, i32 0, i32 0) %mai.2 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> %mai.1, i32 1, i32 2, i32 3) - store <4 x double> %mai.2, <4 x double> addrspace(1)* %arg + store <4 x double> %mai.2, ptr addrspace(1) %arg ret void } -define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 { +define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, double %a, double %b) #0 { ; GCN-LABEL: test_mfma_f64_16x16x4f64_imm: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x24 @@ -345,11 +345,11 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(<4 x double> addrspace(1 ; GCN-NEXT: s_endpgm bb: %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> , i32 0, i32 0, i32 0) - store <4 x double> %mai.1, <4 x double> addrspace(1)* %arg + store <4 x double> %mai.1, ptr addrspace(1) %arg ret void } -define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 { +define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %arg, double %a, double %b) #0 { ; GCN-LABEL: test_mfma_f64_16x16x4f64_splat_lit: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x24 @@ -381,7 +381,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(<4 x double> addrs ; GCN-NEXT: s_endpgm bb: %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> , i32 0, i32 0, i32 0) - store <4 x double> %mai.1, <4 x double> addrspace(1)* %arg + store <4 x double> %mai.1, ptr addrspace(1) %arg ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll index 178b60463f1d5..820b5907569d4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll @@ -5,7 +5,7 @@ ; FIXME: Merge with DAG test -define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in) { ; GFX8-LABEL: dpp_test: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dword s2, s[0:1], 0x2c @@ -43,10 +43,10 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 true) #0 - store i32 %tmp0, i32 addrspace(1)* %out + store i32 %tmp0, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @mov_dpp64_test(i64 addrspace(1)* %out, i64 %in1) { +define amdgpu_kernel void @mov_dpp64_test(ptr addrspace(1) %out, i64 %in1) { ; GFX8-LABEL: mov_dpp64_test: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -84,7 +84,7 @@ define amdgpu_kernel void @mov_dpp64_test(i64 addrspace(1)* %out, i64 %in1) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] %tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 %in1, i32 1, i32 1, i32 1, i1 false) #0 - store i64 %tmp0, i64 addrspace(1)* %out + store i64 %tmp0, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll index 4f51c551edf30..704e0a2a37e87 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll @@ -5,14 +5,13 @@ ; GCN-LABEL: {{^}}test: ; GCN: enable_sgpr_queue_ptr = 1 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 -define amdgpu_kernel void @test(i32 addrspace(1)* %out) { - %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 - %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* - %value = load i32, i32 addrspace(4)* %header_ptr - store i32 %value, i32 addrspace(1)* %out +define amdgpu_kernel void @test(ptr addrspace(1) %out) { + %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 + %value = load i32, ptr addrspace(4) %queue_ptr + store i32 %value, ptr addrspace(1) %out ret void } -declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 +declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 attributes #0 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll index 9120f53cb5565..4ac30eb5c28e9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll @@ -11,7 +11,7 @@ declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i3 define amdgpu_kernel void @buffer_atomic_add_f32_rtn(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 %soffset) { main_body: %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) - store float %ret, float* undef + store float %ret, ptr undef ret void } @@ -20,6 +20,6 @@ main_body: define amdgpu_kernel void @buffer_atomic_add_v2f16_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { main_body: %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) - store <2 x half> %ret, <2 x half>* undef + store <2 x half> %ret, ptr undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll index 7ad408c1f547e..bf09de7d62e25 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -5105,5 +5105,5 @@ declare <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32>, i32, i32 immarg) declare <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32>, i32, i32 immarg) declare <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32>, i32, i32 immarg) -declare <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32>, i32, i32 immarg) -declare <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32>, i32, i32 immarg) +declare <4 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v4p1(<4 x i32>, i32, i32 immarg) +declare <8 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v8p1(<4 x i32>, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll index 32c80c8e086e5..a2aa740410ab9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll @@ -41,7 +41,7 @@ define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i3 ret i64 %bfe_i32 } -define amdgpu_kernel void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { +define amdgpu_kernel void @bfe_i32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_i32_arg_arg_imm: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -55,11 +55,11 @@ define amdgpu_kernel void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 123) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { +define amdgpu_kernel void @bfe_i32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 { ; GFX6-LABEL: bfe_i32_arg_imm_arg: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -73,11 +73,11 @@ define amdgpu_kernel void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 123, i32 %src2) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { +define amdgpu_kernel void @bfe_i32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 { ; GFX6-LABEL: bfe_i32_imm_arg_arg: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -92,11 +92,11 @@ define amdgpu_kernel void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 123, i32 %src1, i32 %src2) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) #0 { +define amdgpu_kernel void @v_bfe_print_arg(ptr addrspace(1) %out, ptr addrspace(1) %src0) #0 { ; GFX6-LABEL: v_bfe_print_arg: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -109,13 +109,13 @@ define amdgpu_kernel void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %src0, align 4 + %load = load i32, ptr addrspace(1) %src0, align 4 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 2, i32 8) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { +define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_i32_arg_0_width_reg_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -128,11 +128,11 @@ define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 0) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { +define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_i32_arg_0_width_imm_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 @@ -145,11 +145,11 @@ define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 8, i32 0) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_i32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_i32_test_6: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -163,14 +163,14 @@ define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace( ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 1, i32 31) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_i32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_i32_test_7: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -184,14 +184,14 @@ define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace( ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 0, i32 31) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_i32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_i32_test_8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -205,14 +205,14 @@ define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace( ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_i32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_i32_test_9: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -225,13 +225,13 @@ define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace( ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_i32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_i32_test_10: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -244,13 +244,13 @@ define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 1, i32 31) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_i32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_i32_test_11: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -263,13 +263,13 @@ define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 8, i32 24) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_i32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_i32_test_12: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -282,13 +282,13 @@ define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 24, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_i32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_i32_test_13: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -302,13 +302,13 @@ define amdgpu_kernel void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = ashr i32 %x, 31 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_i32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_i32_test_14: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -322,13 +322,13 @@ define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = lshr i32 %x, 31 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_0(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_0: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -340,11 +340,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 0) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_1(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_1: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -356,11 +356,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 12334, i32 0, i32 0) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_2(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_2: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -372,11 +372,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 1) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_3(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_3: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -388,11 +388,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 1, i32 0, i32 1) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_4(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_4: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -404,11 +404,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 0, i32 1) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_5(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_5: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -421,11 +421,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 7, i32 1) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_6(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_6: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -438,11 +438,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 0, i32 8) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_7(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_7: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -455,11 +455,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 0, i32 8) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_8(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -472,11 +472,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 6, i32 8) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_9(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_9: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -489,11 +489,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65536, i32 16, i32 8) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_10(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_10: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -506,11 +506,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65535, i32 16, i32 16) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_11(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_11: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -523,11 +523,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 4) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_12(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_12: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -540,11 +540,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 31, i32 1) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_13(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_13: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -557,11 +557,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 131070, i32 16, i32 16) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_14(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_14: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -574,11 +574,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 2, i32 30) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_15(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_15: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -591,11 +591,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 28) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_16(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -607,11 +607,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 1, i32 7) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_17(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_17: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -624,11 +624,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 1, i32 31) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_18(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_18: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -641,11 +641,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 31, i32 1) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_sext_in_reg_i24(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_sext_in_reg_i24: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -659,15 +659,15 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrs ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 0, i32 24) %shl = shl i32 %bfe, 8 %ashr = ashr i32 %shl, 8 - store i32 %ashr, i32 addrspace(1)* %out, align 4 + store i32 %ashr, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @simplify_demanded_bfe_sdiv(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: simplify_demanded_bfe_sdiv: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, 2.0 @@ -701,14 +701,14 @@ define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i3 ; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm - %src = load i32, i32 addrspace(1)* %in, align 4 + %src = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16) %div = sdiv i32 %bfe, 2 - store i32 %div, i32 addrspace(1)* %out, align 4 + store i32 %div, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @bfe_0_width(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; GFX6-LABEL: bfe_0_width: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -721,13 +721,13 @@ define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %ptr, align 4 + %load = load i32, ptr addrspace(1) %ptr, align 4 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 8, i32 0) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @bfe_8_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; GFX6-LABEL: bfe_8_bfe_8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -741,14 +741,14 @@ define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %ptr, align 4 + %load = load i32, ptr addrspace(1) %ptr, align 4 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8) %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8) - store i32 %bfe1, i32 addrspace(1)* %out, align 4 + store i32 %bfe1, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @bfe_8_bfe_16(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; GFX6-LABEL: bfe_8_bfe_16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -762,15 +762,15 @@ define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1) ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %ptr, align 4 + %load = load i32, ptr addrspace(1) %ptr, align 4 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8) %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 16) - store i32 %bfe1, i32 addrspace(1)* %out, align 4 + store i32 %bfe1, ptr addrspace(1) %out, align 4 ret void } ; This really should be folded into 1 -define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @bfe_16_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; GFX6-LABEL: bfe_16_bfe_8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -784,15 +784,15 @@ define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1) ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %ptr, align 4 + %load = load i32, ptr addrspace(1) %ptr, align 4 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 16) %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8) - store i32 %bfe1, i32 addrspace(1)* %out, align 4 + store i32 %bfe1, ptr addrspace(1) %out, align 4 ret void } ; Make sure there isn't a redundant BFE -define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { ; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -809,11 +809,11 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 0, i32 8) %shl = shl i32 %bfe, 24 %ashr = ashr i32 %shl, 24 - store i32 %ashr, i32 addrspace(1)* %out, align 4 + store i32 %ashr, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { ; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe_wrong: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -830,11 +830,11 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %ou %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 8, i32 0) %shl = shl i32 %bfe, 24 %ashr = ashr i32 %shl, 24 - store i32 %ashr, i32 addrspace(1)* %out, align 4 + store i32 %ashr, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @sextload_i8_to_i32_bfe(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; GFX6-LABEL: sextload_i8_to_i32_bfe: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -849,16 +849,16 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 add ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %load = load i8, i8 addrspace(1)* %ptr, align 1 + %load = load i8, ptr addrspace(1) %ptr, align 1 %sext = sext i8 %load to i32 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 0, i32 8) %shl = shl i32 %bfe, 24 %ashr = ashr i32 %shl, 24 - store i32 %ashr, i32 addrspace(1)* %out, align 4 + store i32 %ashr, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { ; GFX6-LABEL: sextload_i8_to_i32_bfe_0: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -873,16 +873,16 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 a ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %load = load i8, i8 addrspace(1)* %ptr, align 1 + %load = load i8, ptr addrspace(1) %ptr, align 1 %sext = sext i8 %load to i32 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 8, i32 0) %shl = shl i32 %bfe, 24 %ashr = ashr i32 %shl, 24 - store i32 %ashr, i32 addrspace(1)* %out, align 4 + store i32 %ashr, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: sext_in_reg_i1_bfe_offset_0: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -896,15 +896,15 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %shr = ashr i32 %shl, 31 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 0, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: sext_in_reg_i1_bfe_offset_1: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -918,15 +918,15 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 30 %shr = ashr i32 %shl, 30 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: sext_in_reg_i2_bfe_offset_1: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -940,11 +940,11 @@ define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 30 %shr = ashr i32 %shl, 30 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 2) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll index d9061eb009df7..95ede542ddd7f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -define amdgpu_kernel void @set_inactive(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @set_inactive(ptr addrspace(1) %out, i32 %in) { ; GCN-LABEL: set_inactive: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c @@ -16,11 +16,11 @@ define amdgpu_kernel void @set_inactive(i32 addrspace(1)* %out, i32 %in) { ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GCN-NEXT: s_endpgm %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0 - store i32 %tmp, i32 addrspace(1)* %out + store i32 %tmp, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @set_inactive_64(i64 addrspace(1)* %out, i64 %in) { +define amdgpu_kernel void @set_inactive_64(ptr addrspace(1) %out, i64 %in) { ; GCN-LABEL: set_inactive_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -36,11 +36,11 @@ define amdgpu_kernel void @set_inactive_64(i64 addrspace(1)* %out, i64 %in) { ; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN-NEXT: s_endpgm %tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 %in, i64 0) #0 - store i64 %tmp, i64 addrspace(1)* %out + store i64 %tmp, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @set_inactive_scc(i32 addrspace(1)* %out, i32 %in, <4 x i32> inreg %desc) { +define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x i32> inreg %desc) { ; GCN-LABEL: set_inactive_scc: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 @@ -83,12 +83,12 @@ define amdgpu_kernel void @set_inactive_scc(i32 addrspace(1)* %out, i32 %in, <4 br i1 %cmp, label %.zero, label %.one .zero: - store i32 %tmp, i32 addrspace(1)* %out + store i32 %tmp, ptr addrspace(1) %out br label %.exit .one: %tmp.1 = add i32 %tmp, 1 - store i32 %tmp.1, i32 addrspace(1)* %out + store i32 %tmp.1, ptr addrspace(1) %out br label %.exit .exit: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll index 328f2e2b121bb..6d9f5d7143254 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll @@ -12,7 +12,7 @@ declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x define amdgpu_kernel void @buffer_atomic_add_f32_rtn(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) { main_body: %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) - store float %ret, float* undef + store float %ret, ptr undef ret void } @@ -21,6 +21,6 @@ main_body: define amdgpu_kernel void @buffer_atomic_add_v2f16_rtn(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) { main_body: %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) - store <2 x half> %ret, <2 x half>* undef + store <2 x half> %ret, ptr undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll index c13ff4c2cb2b2..a1b47b440d72e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll @@ -214,7 +214,7 @@ define amdgpu_ps float @struct_buffer_load_format_i32__sgpr_rsrc__vgpr_vindex__v ret float %fval } -define amdgpu_cs void @struct_buffer_load_format_v4i32_tfe(<4 x i32> inreg %rsrc, <4 x i32> addrspace(1)* %value, i32 addrspace(1)* %status) { +define amdgpu_cs void @struct_buffer_load_format_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %value, ptr addrspace(1) %status) { ; CHECK-LABEL: name: struct_buffer_load_format_v4i32_tfe ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -245,15 +245,15 @@ define amdgpu_cs void @struct_buffer_load_format_v4i32_tfe(<4 x i32> inreg %rsrc %load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %v = extractvalue { <4 x i32>, i32 } %load, 0 - store <4 x i32> %v, <4 x i32> addrspace(1)* %value + store <4 x i32> %v, ptr addrspace(1) %value %s = extractvalue { <4 x i32>, i32 } %load, 1 - store i32 %s, i32 addrspace(1)* %status + store i32 %s, ptr addrspace(1) %status ret void } -define amdgpu_cs void @struct_buffer_load_format_v3i32_tfe(<4 x i32> inreg %rsrc, <3 x i32> addrspace(1)* %value, i32 addrspace(1)* %status) { +define amdgpu_cs void @struct_buffer_load_format_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %value, ptr addrspace(1) %status) { ; CHECK-LABEL: name: struct_buffer_load_format_v3i32_tfe ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -283,15 +283,15 @@ define amdgpu_cs void @struct_buffer_load_format_v3i32_tfe(<4 x i32> inreg %rsrc %load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %v = extractvalue { <3 x i32>, i32 } %load, 0 - store <3 x i32> %v, <3 x i32> addrspace(1)* %value + store <3 x i32> %v, ptr addrspace(1) %value %s = extractvalue { <3 x i32>, i32 } %load, 1 - store i32 %s, i32 addrspace(1)* %status + store i32 %s, ptr addrspace(1) %status ret void } -define amdgpu_cs void @struct_buffer_load_format_i32_tfe(<4 x i32> inreg %rsrc, i32 addrspace(1)* %value, i32 addrspace(1)* %status) { +define amdgpu_cs void @struct_buffer_load_format_i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %value, ptr addrspace(1) %status) { ; CHECK-LABEL: name: struct_buffer_load_format_i32_tfe ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -318,10 +318,10 @@ define amdgpu_cs void @struct_buffer_load_format_i32_tfe(<4 x i32> inreg %rsrc, %load = call { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %v = extractvalue { i32, i32 } %load, 0 - store i32 %v, i32 addrspace(1)* %value + store i32 %v, ptr addrspace(1) %value %s = extractvalue { i32, i32 } %load, 1 - store i32 %s, i32 addrspace(1)* %status + store i32 %s, ptr addrspace(1) %status ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll index 608746f71e18b..a5f5454cf6107 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll @@ -96,7 +96,7 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b) - store volatile double %result, double* undef + store volatile double %result, ptr undef ret void } @@ -129,7 +129,7 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7) - store volatile double %result, double* undef + store volatile double %result, ptr undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll index c4fa11a15731c..43a0f018dc1cd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll @@ -41,7 +41,7 @@ define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i3 ret i64 %bfe_i32 } -define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 { +define amdgpu_kernel void @bfe_u32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 { ; GFX6-LABEL: bfe_u32_arg_arg_arg: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -56,11 +56,11 @@ define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { +define amdgpu_kernel void @bfe_u32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_u32_arg_arg_imm: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -74,11 +74,11 @@ define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { +define amdgpu_kernel void @bfe_u32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 { ; GFX6-LABEL: bfe_u32_arg_imm_arg: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -92,11 +92,11 @@ define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { +define amdgpu_kernel void @bfe_u32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 { ; GFX6-LABEL: bfe_u32_imm_arg_arg: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -111,11 +111,11 @@ define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { +define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_u32_arg_0_width_reg_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -128,11 +128,11 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { +define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_u32_arg_0_width_imm_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 @@ -145,11 +145,11 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_zextload_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_zextload_i8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -163,15 +163,15 @@ define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrsp ; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 8 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %load = load i8, i8 addrspace(1)* %in + %load = load i8, ptr addrspace(1) %in %ext = zext i8 %load to i32 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } ; FIXME: Should be using s_add_i32 -define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -186,15 +186,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 ad ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %in, align 4 + %load = load i32, ptr addrspace(1) %in, align 4 %add = add i32 %load, 1 %ext = and i32 %add, 255 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -209,15 +209,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 a ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %in, align 4 + %load = load i32, ptr addrspace(1) %in, align 4 %add = add i32 %load, 1 %ext = and i32 %add, 65535 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_1: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -232,15 +232,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %ou ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %in, align 4 + %load = load i32, ptr addrspace(1) %in, align 4 %add = add i32 %load, 1 %ext = and i32 %add, 255 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_3: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -255,15 +255,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %ou ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %in, align 4 + %load = load i32, ptr addrspace(1) %in, align 4 %add = add i32 %load, 1 %ext = and i32 %add, 255 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_7: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -278,15 +278,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %ou ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %in, align 4 + %load = load i32, ptr addrspace(1) %in, align 4 %add = add i32 %load, 1 %ext = and i32 %add, 255 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i16_offset_8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -301,15 +301,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %o ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %in, align 4 + %load = load i32, ptr addrspace(1) %in, align 4 %add = add i32 %load, 1 %ext = and i32 %add, 65535 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_test_1: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -322,13 +322,13 @@ define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace( ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_test_2: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -342,14 +342,14 @@ define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace( ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_test_3: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -363,14 +363,14 @@ define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace( ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_test_4: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -384,15 +384,15 @@ define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace( ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %shr = lshr i32 %shl, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_5(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_test_5: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -406,15 +406,15 @@ define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace( ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %shr = ashr i32 %shl, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_test_6: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -428,14 +428,14 @@ define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace( ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_test_7: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -449,14 +449,14 @@ define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace( ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_test_8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -470,14 +470,14 @@ define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace( ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_test_9: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -490,13 +490,13 @@ define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace( ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_test_10: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -509,13 +509,13 @@ define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_test_11: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -528,13 +528,13 @@ define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_test_12: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -547,14 +547,14 @@ define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } ; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} -define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_test_13: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -568,13 +568,13 @@ define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = ashr i32 %x, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX6-LABEL: bfe_u32_test_14: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -588,13 +588,13 @@ define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = lshr i32 %x, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_0(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_0: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -606,11 +606,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_1(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_1: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -622,11 +622,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_2(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_2: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -638,11 +638,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_3(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_3: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -654,11 +654,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_4(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_4: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -670,11 +670,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_5(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_5: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -687,11 +687,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_6(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_6: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -704,11 +704,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_7(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_7: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -721,11 +721,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_8(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -738,11 +738,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_9(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_9: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -755,11 +755,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_10(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_10: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -772,11 +772,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_11(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_11: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -789,11 +789,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_12(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_12: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -806,11 +806,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_13(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_13: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -823,11 +823,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_14(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_14: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -840,11 +840,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_15(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_15: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -857,11 +857,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_16(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -873,11 +873,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_17(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_17: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -890,11 +890,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_18(ptr addrspace(1) %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_18: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -907,7 +907,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } @@ -915,7 +915,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) ; reduced to the bits demanded by the bfe. ; XXX: The operand to v_bfe_u32 could also just directly be the load register. -define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0, +define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(ptr addrspace(1) %out0, ; GFX6-LABEL: simplify_bfe_u32_multi_use_arg: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x4 @@ -934,17 +934,17 @@ define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out ; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3] ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm - i32 addrspace(1)* %out1, - i32 addrspace(1)* %in) #0 { - %src = load i32, i32 addrspace(1)* %in, align 4 + ptr addrspace(1) %out1, + ptr addrspace(1) %in) #0 { + %src = load i32, ptr addrspace(1) %in, align 4 %and = and i32 %src, 63 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2) - store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4 - store i32 %and, i32 addrspace(1)* %out1, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out0, align 4 + store i32 %and, ptr addrspace(1) %out1, align 4 ret void } -define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 { +define amdgpu_kernel void @lshr_and(ptr addrspace(1) %out, i32 %a) #0 { ; GFX6-LABEL: lshr_and: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 @@ -958,11 +958,11 @@ define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 { ; GFX6-NEXT: s_endpgm %b = lshr i32 %a, 6 %c = and i32 %b, 7 - store i32 %c, i32 addrspace(1)* %out, align 8 + store i32 %c, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @v_lshr_and(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { ; GFX6-LABEL: v_lshr_and: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -976,11 +976,11 @@ define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 ; GFX6-NEXT: s_endpgm %c = lshr i32 %a, %b %d = and i32 %c, 7 - store i32 %d, i32 addrspace(1)* %out, align 8 + store i32 %d, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 { +define amdgpu_kernel void @and_lshr(ptr addrspace(1) %out, i32 %a) #0 { ; GFX6-LABEL: and_lshr: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 @@ -994,11 +994,11 @@ define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 { ; GFX6-NEXT: s_endpgm %b = and i32 %a, 448 %c = lshr i32 %b, 6 - store i32 %c, i32 addrspace(1)* %out, align 8 + store i32 %c, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 { +define amdgpu_kernel void @and_lshr2(ptr addrspace(1) %out, i32 %a) #0 { ; GFX6-LABEL: and_lshr2: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 @@ -1012,11 +1012,11 @@ define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 { ; GFX6-NEXT: s_endpgm %b = and i32 %a, 511 %c = lshr i32 %b, 6 - store i32 %c, i32 addrspace(1)* %out, align 8 + store i32 %c, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 { +define amdgpu_kernel void @shl_lshr(ptr addrspace(1) %out, i32 %a) #0 { ; GFX6-LABEL: shl_lshr: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 @@ -1030,7 +1030,7 @@ define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 { ; GFX6-NEXT: s_endpgm %b = shl i32 %a, 9 %c = lshr i32 %b, 11 - store i32 %c, i32 addrspace(1)* %out, align 8 + store i32 %c, ptr addrspace(1) %out, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll index fc4f7f7d84a42..89f1c0b0f2a1d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s -define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) { +define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in1, i32 %in2) { ; GFX8-LABEL: dpp_test: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -39,10 +39,10 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 false) - store i32 %tmp0, i32 addrspace(1)* %out + store i32 %tmp0, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @update_dpp64_test(i64 addrspace(1)* %arg, i64 %in1, i64 %in2) { +define amdgpu_kernel void @update_dpp64_test(ptr addrspace(1) %arg, i64 %in1, i64 %in2) { ; GFX8-LABEL: update_dpp64_test: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -89,10 +89,10 @@ define amdgpu_kernel void @update_dpp64_test(i64 addrspace(1)* %arg, i64 %in1, i ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %id - %load = load i64, i64 addrspace(1)* %gep + %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id + %load = load i64, ptr addrspace(1) %gep %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 1, i32 1, i32 1, i1 false) #1 - store i64 %tmp0, i64 addrspace(1)* %gep + store i64 %tmp0, ptr addrspace(1) %gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll index 65384954a58a2..9e0c6024837d1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll @@ -10,7 +10,7 @@ declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 immarg, <2 x i32>, i1 im ; @llvm.amdgcn.wmma.f32.16x16x16.f16 -define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <8 x float> %C, <8 x float> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <8 x float> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_f32_16x16x16_f16: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_f32_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] @@ -21,13 +21,13 @@ define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B ; W32-NEXT: s_endpgm bb: %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16(<16 x half> %A, <16 x half> %B, <8 x float> %C) - store <8 x float> %res, <8 x float> addrspace(1)* %out, align 32 + store <8 x float> %res, ptr addrspace(1) %out, align 32 ret void } ; @llvm.amdgcn.wmma.f32.16x16x16.bf16 -define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B, <8 x float> %C, <8 x float> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B, <8 x float> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_f32_16x16x16_bf16: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_f32_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23] @@ -38,13 +38,13 @@ define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B, ; W32-NEXT: s_endpgm bb: %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <8 x float> %C) - store <8 x float> %res, <8 x float> addrspace(1)* %out, align 32 + store <8 x float> %res, ptr addrspace(1) %out, align 32 ret void } ; @llvm.amdgcn.wmma.f16.16x16x16.f16 -define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half> %B, <16 x half> %C, <16 x half> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half> %B, <16 x half> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_f16_16x16x16_f16_lo: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_f16_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] @@ -55,11 +55,11 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half> ; W32-NEXT: s_endpgm bb: %res = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A, <16 x half> %B, <16 x half> %C, i1 0) - store <16 x half> %res, <16 x half> addrspace(1)* %out, align 32 + store <16 x half> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half> %B, <16 x half> %C, <16 x half> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half> %B, <16 x half> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_f16_16x16x16_f16_hi: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_f16_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] op_sel:[0,0,1] @@ -70,13 +70,13 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half> ; W32-NEXT: s_endpgm bb: %res = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A, <16 x half> %B, <16 x half> %C, i1 1) - store <16 x half> %res, <16 x half> addrspace(1)* %out, align 32 + store <16 x half> %res, ptr addrspace(1) %out, align 32 ret void } ; @llvm.amdgcn.wmma.bf16.16x16x16.bf16 -define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, <16 x i16> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_bf16_16x16x16_bf16_lo: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_bf16_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23] @@ -87,11 +87,11 @@ define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> ; W32-NEXT: s_endpgm bb: %res = call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, i1 0) - store <16 x i16> %res, <16 x i16> addrspace(1)* %out, align 32 + store <16 x i16> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, <16 x i16> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_bf16_16x16x16_bf16_hi: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_bf16_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23] op_sel:[0,0,1] @@ -102,13 +102,13 @@ define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16> ; W32-NEXT: s_endpgm bb: %res = call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, i1 1) - store <16 x i16> %res, <16 x i16> addrspace(1)* %out, align 32 + store <16 x i16> %res, ptr addrspace(1) %out, align 32 ret void } ; @llvm.amdgcn.wmma.i32.16x16x16.iu8 -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_unsigned: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] @@ -119,11 +119,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i1 0) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_signed: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0] @@ -134,11 +134,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A, ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 1, <4 x i32> %B, <8 x i32> %C, i1 0) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui8_signed_unsigned: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0] @@ -149,11 +149,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A, ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i1 0) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui8_signed_signed: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,1,0] @@ -164,11 +164,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4 ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 1, <4 x i32> %B, <8 x i32> %C, i1 0) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] clamp @@ -179,11 +179,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i1 1) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0] clamp @@ -194,11 +194,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32 ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 1, <4 x i32> %B, <8 x i32> %C, i1 1) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0] clamp @@ -209,11 +209,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32 ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i1 1) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui8_signed_signed_clamp: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,1,0] clamp @@ -224,13 +224,13 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32> ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 1, <4 x i32> %B, <8 x i32> %C, i1 1) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } ; @llvm.amdgcn.wmma.i32.16x16x16.iu4 -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_unsigned: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] @@ -241,11 +241,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 0) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_signed: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0] @@ -256,11 +256,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A, ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 1, <2 x i32> %B, <8 x i32> %C, i1 0) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui4_signed_unsigned: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0] @@ -271,11 +271,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A, ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 0) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui4_signed_signed: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,1,0] @@ -286,12 +286,12 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2 ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 1, <2 x i32> %B, <8 x i32> %C, i1 0) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] clamp @@ -302,11 +302,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 1) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0] clamp @@ -317,11 +317,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32 ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 1, <2 x i32> %B, <8 x i32> %C, i1 1) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0] clamp @@ -332,11 +332,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32 ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 1) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui4_signed_signed_clamp: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,1,0] clamp @@ -347,7 +347,7 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32> ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 1, <2 x i32> %B, <8 x i32> %C, i1 1) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll index 51b5d6c464a2b..a12f13564fe99 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll @@ -10,7 +10,7 @@ declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 immarg, <2 x i32>, i1 im ; @llvm.amdgcn.wmma.f32.16x16x16.f16 -define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <4 x float> %C, <4 x float> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <4 x float> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_f32_16x16x16_f16: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] @@ -19,13 +19,13 @@ define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B ; W64-NEXT: s_endpgm bb: %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16(<16 x half> %A, <16 x half> %B, <4 x float> %C) - store <4 x float> %res, <4 x float> addrspace(1)* %out, align 16 + store <4 x float> %res, ptr addrspace(1) %out, align 16 ret void } ; @llvm.amdgcn.wmma.f32.16x16x16.bf16 -define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B, <4 x float> %C, <4 x float> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B, <4 x float> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_f32_16x16x16_bf16: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_f32_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] @@ -34,13 +34,13 @@ define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B, ; W64-NEXT: s_endpgm bb: %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <4 x float> %C) - store <4 x float> %res, <4 x float> addrspace(1)* %out, align 16 + store <4 x float> %res, ptr addrspace(1) %out, align 16 ret void } ; @llvm.amdgcn.wmma.f16.16x16x16.f16 -define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half> %B, <8 x half> %C, <8 x half> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half> %B, <8 x half> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_f16_16x16x16_f16_lo: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_f16_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] @@ -49,11 +49,11 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half> ; W64-NEXT: s_endpgm bb: %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A, <16 x half> %B, <8 x half> %C, i1 0) - store <8 x half> %res, <8 x half> addrspace(1)* %out, align 16 + store <8 x half> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half> %B, <8 x half> %C, <8 x half> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half> %B, <8 x half> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_f16_16x16x16_f16_hi: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_f16_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] op_sel:[0,0,1] @@ -62,13 +62,13 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half> ; W64-NEXT: s_endpgm bb: %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A, <16 x half> %B, <8 x half> %C, i1 1) - store <8 x half> %res, <8 x half> addrspace(1)* %out, align 16 + store <8 x half> %res, ptr addrspace(1) %out, align 16 ret void } ; @llvm.amdgcn.wmma.bf16.16x16x16.bf16 -define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, <8 x i16> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_bf16_16x16x16_bf16_lo: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] @@ -77,11 +77,11 @@ define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> ; W64-NEXT: s_endpgm bb: %res = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, i1 0) - store <8 x i16> %res, <8 x i16> addrspace(1)* %out, align 16 + store <8 x i16> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, <8 x i16> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_bf16_16x16x16_bf16_hi: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] op_sel:[0,0,1] @@ -90,13 +90,13 @@ define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16> ; W64-NEXT: s_endpgm bb: %res = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, i1 1) - store <8 x i16> %res, <8 x i16> addrspace(1)* %out, align 16 + store <8 x i16> %res, ptr addrspace(1) %out, align 16 ret void } ; @llvm.amdgcn.wmma.i32.16x16x16.iu8 -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_unsigned: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] @@ -105,12 +105,12 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 0, <4 x i32> %B, <4 x i32> %C, i1 0) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_signed: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0] @@ -119,11 +119,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A, ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 1, <4 x i32> %B, <4 x i32> %C, i1 0) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui8_signed_unsigned: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0] @@ -132,11 +132,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A, ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 0, <4 x i32> %B, <4 x i32> %C, i1 0) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui8_signed_signed: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,1,0] @@ -145,11 +145,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4 ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 1, <4 x i32> %B, <4 x i32> %C, i1 0) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] clamp @@ -158,11 +158,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 0, <4 x i32> %B, <4 x i32> %C, i1 1) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0] clamp @@ -171,11 +171,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32 ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 1, <4 x i32> %B, <4 x i32> %C, i1 1) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0] clamp @@ -184,11 +184,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32 ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 0, <4 x i32> %B, <4 x i32> %C, i1 1) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui8_signed_signed_clamp: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,1,0] clamp @@ -197,13 +197,13 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32> ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 1, <4 x i32> %B, <4 x i32> %C, i1 1) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } ; @llvm.amdgcn.wmma.i32.16x16x16.iu4 -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_unsigned: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] @@ -212,11 +212,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <4 x i32> %C, i1 0) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_signed: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0] @@ -225,11 +225,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A, ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 1, <2 x i32> %B, <4 x i32> %C, i1 0) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui4_signed_unsigned: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0] @@ -238,11 +238,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A, ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 0, <2 x i32> %B, <4 x i32> %C, i1 0) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui4_signed_signed: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,1,0] @@ -251,11 +251,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2 ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 1, <2 x i32> %B, <4 x i32> %C, i1 0) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] clamp @@ -264,11 +264,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <4 x i32> %C, i1 1) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0] clamp @@ -277,11 +277,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32 ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 1, <2 x i32> %B, <4 x i32> %C, i1 1) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0] clamp @@ -290,11 +290,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32 ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 0, <2 x i32> %B, <4 x i32> %C, i1 1) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui4_signed_signed_clamp: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,1,0] clamp @@ -303,7 +303,7 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32> ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 1, <2 x i32> %B, <4 x i32> %C, i1 1) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll index 3af3c59c0092a..148a35fb98cac 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll @@ -34,9 +34,9 @@ declare i32 @llvm.amdgcn.workgroup.id.z() #0 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 ; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 -define amdgpu_kernel void @test_workgroup_id_x(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 { %id = call i32 @llvm.amdgcn.workgroup.id.x() - store i32 %id, i32 addrspace(1)* %out + store i32 %id, ptr addrspace(1) %out ret void } @@ -61,9 +61,9 @@ define amdgpu_kernel void @test_workgroup_id_x(i32 addrspace(1)* %out) #1 { ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 ; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 -define amdgpu_kernel void @test_workgroup_id_y(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 { %id = call i32 @llvm.amdgcn.workgroup.id.y() - store i32 %id, i32 addrspace(1)* %out + store i32 %id, ptr addrspace(1) %out ret void } @@ -96,9 +96,9 @@ define amdgpu_kernel void @test_workgroup_id_y(i32 addrspace(1)* %out) #1 { ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 -define amdgpu_kernel void @test_workgroup_id_z(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_workgroup_id_z(ptr addrspace(1) %out) #1 { %id = call i32 @llvm.amdgcn.workgroup.id.z() - store i32 %id, i32 addrspace(1)* %out + store i32 %id, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll index fa246079e7d28..0ee242bd5c6c3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll @@ -22,9 +22,9 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0 ; ALL: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}v0 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 0 -define amdgpu_kernel void @test_workitem_id_x(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_workitem_id_x(ptr addrspace(1) %out) #1 { %id = call i32 @llvm.amdgcn.workitem.id.x() - store i32 %id, i32 addrspace(1)* %out + store i32 %id, ptr addrspace(1) %out ret void } @@ -40,9 +40,9 @@ define amdgpu_kernel void @test_workitem_id_x(i32 addrspace(1)* %out) #1 { ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10 ; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]] ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 1 -define amdgpu_kernel void @test_workitem_id_y(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_workitem_id_y(ptr addrspace(1) %out) #1 { %id = call i32 @llvm.amdgcn.workitem.id.y() - store i32 %id, i32 addrspace(1)* %out + store i32 %id, ptr addrspace(1) %out ret void } @@ -58,9 +58,9 @@ define amdgpu_kernel void @test_workitem_id_y(i32 addrspace(1)* %out) #1 { ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10 ; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]] ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 2 -define amdgpu_kernel void @test_workitem_id_z(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_workitem_id_z(ptr addrspace(1) %out) #1 { %id = call i32 @llvm.amdgcn.workitem.id.z() - store i32 %id, i32 addrspace(1)* %out + store i32 %id, ptr addrspace(1) %out ret void } @@ -69,12 +69,12 @@ define amdgpu_kernel void @test_workitem_id_z(i32 addrspace(1)* %out) #1 { ; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0 ; ALL-NOT: v0 ; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0 -define amdgpu_kernel void @test_workitem_id_x_usex2(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_workitem_id_x_usex2(ptr addrspace(1) %out) #1 { %id0 = call i32 @llvm.amdgcn.workitem.id.x() - store volatile i32 %id0, i32 addrspace(1)* %out + store volatile i32 %id0, ptr addrspace(1) %out %id1 = call i32 @llvm.amdgcn.workitem.id.x() - store volatile i32 %id1, i32 addrspace(1)* %out + store volatile i32 %id1, ptr addrspace(1) %out ret void } @@ -83,15 +83,15 @@ define amdgpu_kernel void @test_workitem_id_x_usex2(i32 addrspace(1)* %out) #1 { ; ALL: {{flat|global}}_store_{{dword|b32}} ; ALL-NOT: v0 ; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0 -define amdgpu_kernel void @test_workitem_id_x_use_outside_entry(i32 addrspace(1)* %out, i32 %arg) #1 { +define amdgpu_kernel void @test_workitem_id_x_use_outside_entry(ptr addrspace(1) %out, i32 %arg) #1 { bb0: - store volatile i32 0, i32 addrspace(1)* %out + store volatile i32 0, ptr addrspace(1) %out %cond = icmp eq i32 %arg, 0 br i1 %cond, label %bb1, label %bb2 bb1: %id = call i32 @llvm.amdgcn.workitem.id.x() - store volatile i32 %id, i32 addrspace(1)* %out + store volatile i32 %id, ptr addrspace(1) %out br label %bb2 bb2: @@ -102,27 +102,27 @@ bb2: ; ALL: s_waitcnt ; HSA-NEXT: v_and_b32_e32 v2, 0x3ff, v31 ; MESA-NEXT: v_and_b32_e32 v2, 0x3ff, v31 -define void @test_workitem_id_x_func(i32 addrspace(1)* %out) #1 { +define void @test_workitem_id_x_func(ptr addrspace(1) %out) #1 { %id = call i32 @llvm.amdgcn.workitem.id.x() - store i32 %id, i32 addrspace(1)* %out + store i32 %id, ptr addrspace(1) %out ret void } ; ALL-LABEL: {{^}}test_workitem_id_y_func: ; HSA: v_bfe_u32 v2, v31, 10, 10 ; MESA: v_bfe_u32 v2, v31, 10, 10 -define void @test_workitem_id_y_func(i32 addrspace(1)* %out) #1 { +define void @test_workitem_id_y_func(ptr addrspace(1) %out) #1 { %id = call i32 @llvm.amdgcn.workitem.id.y() - store i32 %id, i32 addrspace(1)* %out + store i32 %id, ptr addrspace(1) %out ret void } ; ALL-LABEL: {{^}}test_workitem_id_z_func: ; HSA: v_bfe_u32 v2, v31, 20, 10 ; MESA: v_bfe_u32 v2, v31, 20, 10 -define void @test_workitem_id_z_func(i32 addrspace(1)* %out) #1 { +define void @test_workitem_id_z_func(ptr addrspace(1) %out) #1 { %id = call i32 @llvm.amdgcn.workitem.id.z() - store i32 %id, i32 addrspace(1)* %out + store i32 %id, ptr addrspace(1) %out ret void } @@ -139,13 +139,13 @@ define void @test_workitem_id_z_func(i32 addrspace(1)* %out) #1 { ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] -define amdgpu_kernel void @test_reqd_workgroup_size_x_only(i32* %out) !reqd_work_group_size !0 { +define amdgpu_kernel void @test_reqd_workgroup_size_x_only(ptr %out) !reqd_work_group_size !0 { %id.x = call i32 @llvm.amdgcn.workitem.id.x() %id.y = call i32 @llvm.amdgcn.workitem.id.y() %id.z = call i32 @llvm.amdgcn.workitem.id.z() - store volatile i32 %id.x, i32* %out - store volatile i32 %id.y, i32* %out - store volatile i32 %id.z, i32* %out + store volatile i32 %id.x, ptr %out + store volatile i32 %id.y, ptr %out + store volatile i32 %id.z, ptr %out ret void } @@ -161,13 +161,13 @@ define amdgpu_kernel void @test_reqd_workgroup_size_x_only(i32* %out) !reqd_work ; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] -define amdgpu_kernel void @test_reqd_workgroup_size_y_only(i32* %out) !reqd_work_group_size !1 { +define amdgpu_kernel void @test_reqd_workgroup_size_y_only(ptr %out) !reqd_work_group_size !1 { %id.x = call i32 @llvm.amdgcn.workitem.id.x() %id.y = call i32 @llvm.amdgcn.workitem.id.y() %id.z = call i32 @llvm.amdgcn.workitem.id.z() - store volatile i32 %id.x, i32* %out - store volatile i32 %id.y, i32* %out - store volatile i32 %id.z, i32* %out + store volatile i32 %id.x, ptr %out + store volatile i32 %id.y, ptr %out + store volatile i32 %id.z, ptr %out ret void } @@ -182,13 +182,13 @@ define amdgpu_kernel void @test_reqd_workgroup_size_y_only(i32* %out) !reqd_work ; PACKED: v_bfe_u32 [[MASKED:v[0-9]+]], v0, 10, 20 ; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] -define amdgpu_kernel void @test_reqd_workgroup_size_z_only(i32* %out) !reqd_work_group_size !2 { +define amdgpu_kernel void @test_reqd_workgroup_size_z_only(ptr %out) !reqd_work_group_size !2 { %id.x = call i32 @llvm.amdgcn.workitem.id.x() %id.y = call i32 @llvm.amdgcn.workitem.id.y() %id.z = call i32 @llvm.amdgcn.workitem.id.z() - store volatile i32 %id.x, i32* %out - store volatile i32 %id.y, i32* %out - store volatile i32 %id.z, i32* %out + store volatile i32 %id.x, ptr %out + store volatile i32 %id.y, ptr %out + store volatile i32 %id.z, ptr %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll index 8ff79e1184ce4..49502b3435609 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll @@ -2,9 +2,9 @@ ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=GCN %s ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=GCN %s -declare void @llvm.memcpy.inline.p1i8.p1i8.i32(i8 addrspace(1)*, i8 addrspace(1)*, i32, i1 immarg) +declare void @llvm.memcpy.inline.p1.p1.i32(ptr addrspace(1), ptr addrspace(1), i32, i1 immarg) -define amdgpu_cs void @test(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) { +define amdgpu_cs void @test(ptr addrspace(1) %dst, ptr addrspace(1) %src) { ; GCN-LABEL: test: ; GCN: ; %bb.0: ; GCN-NEXT: s_mov_b32 s2, 0 @@ -25,6 +25,6 @@ define amdgpu_cs void @test(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:3 ; GCN-NEXT: s_endpgm - call void @llvm.memcpy.inline.p1i8.p1i8.i32(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 4, i1 false) + call void @llvm.memcpy.inline.p1.p1.i32(ptr addrspace(1) %dst, ptr addrspace(1) %src, i32 4, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll index 52163982d1e2d..ef8db23aaa672 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll @@ -2,9 +2,9 @@ ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=19 %s -o - | FileCheck -check-prefix=LOOP %s ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=21 %s -o - | FileCheck -check-prefix=UNROLL %s -declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)*, i8 addrspace(1)*, i32, i1 immarg) +declare void @llvm.memcpy.p1.p1.i32(ptr addrspace(1), ptr addrspace(1), i32, i1 immarg) -define amdgpu_cs void @memcpy_p1i8(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) { +define amdgpu_cs void @memcpy_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src) { ; LOOP-LABEL: memcpy_p1i8: ; LOOP: ; %bb.0: ; LOOP-NEXT: s_mov_b32 s2, 0 @@ -225,7 +225,7 @@ define amdgpu_cs void @memcpy_p1i8(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) ; UNROLL-NEXT: s_waitcnt vmcnt(0) ; UNROLL-NEXT: buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:19 ; UNROLL-NEXT: s_endpgm - call void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 20, i1 false) + call void @llvm.memcpy.p1.p1.i32(ptr addrspace(1) %dst, ptr addrspace(1) %src, i32 20, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll index a1e40dd5da91d..c7a1e163c04aa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll @@ -2,9 +2,9 @@ ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=LOOP %s ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=UNROLL %s -declare void @llvm.memmove.p1i8.p1i8.i32(i8 addrspace(1)*, i8 addrspace(1)*, i32, i1) +declare void @llvm.memmove.p1.p1.i32(ptr addrspace(1), ptr addrspace(1), i32, i1) -define amdgpu_cs void @memmove_p1i8(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) { +define amdgpu_cs void @memmove_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src) { ; LOOP-LABEL: memmove_p1i8: ; LOOP: ; %bb.0: ; LOOP-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[0:1] @@ -76,6 +76,6 @@ define amdgpu_cs void @memmove_p1i8(i8 addrspace(1)* %dst, i8 addrspace(1)* %src ; UNROLL-NEXT: s_waitcnt vmcnt(3) ; UNROLL-NEXT: buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:3 ; UNROLL-NEXT: s_endpgm - call void @llvm.memmove.p1i8.p1i8.i32(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 4, i1 false) + call void @llvm.memmove.p1.p1.i32(ptr addrspace(1) %dst, ptr addrspace(1) %src, i32 4, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll index cdc59777252ff..dd3f9f12111dc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll @@ -2,9 +2,9 @@ ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=LOOP %s ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=UNROLL %s -declare void @llvm.memset.p1i8.i32(i8 addrspace(1)*, i8, i32, i1) +declare void @llvm.memset.p1.i32(ptr addrspace(1), i8, i32, i1) -define amdgpu_cs void @memset_p1i8(i8 addrspace(1)* %dst, i8 %val) { +define amdgpu_cs void @memset_p1i8(ptr addrspace(1) %dst, i8 %val) { ; LOOP-LABEL: memset_p1i8: ; LOOP: ; %bb.0: ; %loadstoreloop.preheader ; LOOP-NEXT: s_mov_b64 s[0:1], 0 @@ -34,6 +34,6 @@ define amdgpu_cs void @memset_p1i8(i8 addrspace(1)* %dst, i8 %val) { ; UNROLL-NEXT: buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:2 ; UNROLL-NEXT: buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:3 ; UNROLL-NEXT: s_endpgm - call void @llvm.memset.p1i8.i32(i8 addrspace(1)* %dst, i8 %val, i32 4, i1 false) + call void @llvm.memset.p1.i32(ptr addrspace(1) %dst, i8 %val, i32 4, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll index 98bc9815f001d..469e6ddad0a07 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll @@ -7,7 +7,7 @@ ; FIXME: ; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s -define <3 x i32> @v_load_constant_v3i32_align1(<3 x i32> addrspace(4)* %ptr) { +define <3 x i32> @v_load_constant_v3i32_align1(ptr addrspace(4) %ptr) { ; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align1: ; GFX9-UNALIGNED: ; %bb.0: ; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -112,11 +112,11 @@ define <3 x i32> @v_load_constant_v3i32_align1(<3 x i32> addrspace(4)* %ptr) { ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v4, v3 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v6, v5 ; GFX7-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] - %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 1 + %load = load <3 x i32>, ptr addrspace(4) %ptr, align 1 ret <3 x i32> %load } -define <3 x i32> @v_load_constant_v3i32_align2(<3 x i32> addrspace(4)* %ptr) { +define <3 x i32> @v_load_constant_v3i32_align2(ptr addrspace(4) %ptr) { ; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align2: ; GFX9-UNALIGNED: ; %bb.0: ; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -176,11 +176,11 @@ define <3 x i32> @v_load_constant_v3i32_align2(<3 x i32> addrspace(4)* %ptr) { ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v2, v7 ; GFX7-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] - %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 2 + %load = load <3 x i32>, ptr addrspace(4) %ptr, align 2 ret <3 x i32> %load } -define <3 x i32> @v_load_constant_v3i32_align4(<3 x i32> addrspace(4)* %ptr) { +define <3 x i32> @v_load_constant_v3i32_align4(ptr addrspace(4) %ptr) { ; GFX9-LABEL: v_load_constant_v3i32_align4: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -197,11 +197,11 @@ define <3 x i32> @v_load_constant_v3i32_align4(<3 x i32> addrspace(4)* %ptr) { ; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4 + %load = load <3 x i32>, ptr addrspace(4) %ptr, align 4 ret <3 x i32> %load } -define i96 @v_load_constant_i96_align8(i96 addrspace(4)* %ptr) { +define i96 @v_load_constant_i96_align8(ptr addrspace(4) %ptr) { ; GFX9-LABEL: v_load_constant_i96_align8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -218,11 +218,11 @@ define i96 @v_load_constant_i96_align8(i96 addrspace(4)* %ptr) { ; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %load = load i96, i96 addrspace(4)* %ptr, align 8 + %load = load i96, ptr addrspace(4) %ptr, align 8 ret i96 %load } -define <3 x i32> @v_load_constant_v3i32_align8(<3 x i32> addrspace(4)* %ptr) { +define <3 x i32> @v_load_constant_v3i32_align8(ptr addrspace(4) %ptr) { ; GFX9-LABEL: v_load_constant_v3i32_align8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -239,11 +239,11 @@ define <3 x i32> @v_load_constant_v3i32_align8(<3 x i32> addrspace(4)* %ptr) { ; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 8 + %load = load <3 x i32>, ptr addrspace(4) %ptr, align 8 ret <3 x i32> %load } -define <6 x i16> @v_load_constant_v6i16_align8(<6 x i16> addrspace(4)* %ptr) { +define <6 x i16> @v_load_constant_v6i16_align8(ptr addrspace(4) %ptr) { ; GFX9-LABEL: v_load_constant_v6i16_align8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -266,11 +266,11 @@ define <6 x i16> @v_load_constant_v6i16_align8(<6 x i16> addrspace(4)* %ptr) { ; GFX7-NEXT: v_mov_b32_e32 v2, v7 ; GFX7-NEXT: v_mov_b32_e32 v4, v8 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %load = load <6 x i16>, <6 x i16> addrspace(4)* %ptr, align 8 + %load = load <6 x i16>, ptr addrspace(4) %ptr, align 8 ret <6 x i16> %load } -define <12 x i8> @v_load_constant_v12i8_align8(<12 x i8> addrspace(4)* %ptr) { +define <12 x i8> @v_load_constant_v12i8_align8(ptr addrspace(4) %ptr) { ; GFX9-LABEL: v_load_constant_v12i8_align8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -313,11 +313,11 @@ define <12 x i8> @v_load_constant_v12i8_align8(<12 x i8> addrspace(4)* %ptr) { ; GFX7-NEXT: v_mov_b32_e32 v1, v13 ; GFX7-NEXT: v_mov_b32_e32 v2, v12 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %load = load <12 x i8>, <12 x i8> addrspace(4)* %ptr, align 8 + %load = load <12 x i8>, ptr addrspace(4) %ptr, align 8 ret <12 x i8> %load } -define <3 x i32> @v_load_constant_v3i32_align16(<3 x i32> addrspace(4)* %ptr) { +define <3 x i32> @v_load_constant_v3i32_align16(ptr addrspace(4) %ptr) { ; GFX9-LABEL: v_load_constant_v3i32_align16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -334,11 +334,11 @@ define <3 x i32> @v_load_constant_v3i32_align16(<3 x i32> addrspace(4)* %ptr) { ; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 16 + %load = load <3 x i32>, ptr addrspace(4) %ptr, align 16 ret <3 x i32> %load } -define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(<3 x i32> addrspace(4)* inreg %ptr) { +define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(ptr addrspace(4) inreg %ptr) { ; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align1: ; GFX9-UNALIGNED: ; %bb.0: ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0 @@ -451,11 +451,11 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(<3 x i32> addrspace(4)* ; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 ; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 ; GFX7-NOUNALIGNED-NEXT: ; return to shader part epilog - %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 1 + %load = load <3 x i32>, ptr addrspace(4) %ptr, align 1 ret <3 x i32> %load } -define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(<3 x i32> addrspace(4)* inreg %ptr) { +define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(ptr addrspace(4) inreg %ptr) { ; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align2: ; GFX9-UNALIGNED: ; %bb.0: ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0 @@ -523,11 +523,11 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(<3 x i32> addrspace(4)* ; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 ; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 ; GFX7-NOUNALIGNED-NEXT: ; return to shader part epilog - %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 2 + %load = load <3 x i32>, ptr addrspace(4) %ptr, align 2 ret <3 x i32> %load } -define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* inreg %ptr) { +define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(ptr addrspace(4) inreg %ptr) { ; GFX9-LABEL: s_load_constant_v3i32_align4: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 @@ -545,11 +545,11 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* ; GFX7-NEXT: s_mov_b32 s0, s4 ; GFX7-NEXT: s_mov_b32 s1, s5 ; GFX7-NEXT: ; return to shader part epilog - %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4 + %load = load <3 x i32>, ptr addrspace(4) %ptr, align 4 ret <3 x i32> %load } -define amdgpu_ps i96 @s_load_constant_i96_align8(i96 addrspace(4)* inreg %ptr) { +define amdgpu_ps i96 @s_load_constant_i96_align8(ptr addrspace(4) inreg %ptr) { ; GFX9-LABEL: s_load_constant_i96_align8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 @@ -567,11 +567,11 @@ define amdgpu_ps i96 @s_load_constant_i96_align8(i96 addrspace(4)* inreg %ptr) { ; GFX7-NEXT: s_mov_b32 s0, s4 ; GFX7-NEXT: s_mov_b32 s1, s5 ; GFX7-NEXT: ; return to shader part epilog - %load = load i96, i96 addrspace(4)* %ptr, align 8 + %load = load i96, ptr addrspace(4) %ptr, align 8 ret i96 %load } -define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(<3 x i32> addrspace(4)* inreg %ptr) { +define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(ptr addrspace(4) inreg %ptr) { ; GFX9-LABEL: s_load_constant_v3i32_align8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 @@ -589,11 +589,11 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(<3 x i32> addrspace(4)* ; GFX7-NEXT: s_mov_b32 s0, s4 ; GFX7-NEXT: s_mov_b32 s1, s5 ; GFX7-NEXT: ; return to shader part epilog - %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 8 + %load = load <3 x i32>, ptr addrspace(4) %ptr, align 8 ret <3 x i32> %load } -define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(<6 x i16> addrspace(4)* inreg %ptr) { +define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(ptr addrspace(4) inreg %ptr) { ; GFX9-LABEL: s_load_constant_v6i16_align8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 @@ -611,12 +611,12 @@ define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(<6 x i16> addrspace(4)* ; GFX7-NEXT: s_mov_b32 s0, s4 ; GFX7-NEXT: s_mov_b32 s1, s5 ; GFX7-NEXT: ; return to shader part epilog - %load = load <6 x i16>, <6 x i16> addrspace(4)* %ptr, align 8 + %load = load <6 x i16>, ptr addrspace(4) %ptr, align 8 %cast = bitcast <6 x i16> %load to <3 x i32> ret <3 x i32> %cast } -define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(<12 x i8> addrspace(4)* inreg %ptr) { +define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(ptr addrspace(4) inreg %ptr) { ; GFX9-LABEL: s_load_constant_v12i8_align8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0 @@ -652,16 +652,16 @@ define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(<12 x i8> addrspace(4)* ; GFX7-NEXT: s_mov_b32 s0, s12 ; GFX7-NEXT: s_mov_b32 s4, s13 ; GFX7-NEXT: ; return to shader part epilog - %load = load <12 x i8>, <12 x i8> addrspace(4)* %ptr, align 8 + %load = load <12 x i8>, ptr addrspace(4) %ptr, align 8 ret <12 x i8> %load } -define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align16(<3 x i32> addrspace(4)* inreg %ptr) { +define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align16(ptr addrspace(4) inreg %ptr) { ; GCN-LABEL: s_load_constant_v3i32_align16: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ; return to shader part epilog - %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 16 + %load = load <3 x i32>, ptr addrspace(4) %ptr, align 16 ret <3 x i32> %load } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll index f0ed8f07c98e2..44e4320cddb22 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll @@ -35,8 +35,8 @@ define amdgpu_ps float @load_constant32bit_vgpr_offset(i32 %arg) { ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog entry: - %gep = getelementptr <{ [4294967295 x float] }>, <{ [4294967295 x float] }> addrspace(6)* null, i32 0, i32 0, i32 %arg - %load = load float, float addrspace(6)* %gep, align 4 + %gep = getelementptr <{ [4294967295 x float] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %arg + %load = load float, ptr addrspace(6) %gep, align 4 ret float %load } @@ -49,13 +49,13 @@ define amdgpu_ps i32 @load_constant32bit_sgpr_offset(i32 inreg %arg) { ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ; return to shader part epilog entry: - %gep = getelementptr <{ [4294967295 x i32] }>, <{ [4294967295 x i32] }> addrspace(6)* null, i32 0, i32 0, i32 %arg - %load = load i32, i32 addrspace(6)* %gep, align 4 + %gep = getelementptr <{ [4294967295 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %arg + %load = load i32, ptr addrspace(6) %gep, align 4 ret i32 %load } ; This gets split during regbankselect -define amdgpu_ps <8 x float> @load_constant32bit_vgpr_v8f32(<8 x float> addrspace(6)* %arg) { +define amdgpu_ps <8 x float> @load_constant32bit_vgpr_v8f32(ptr addrspace(6) %arg) { ; GFX6-LABEL: load_constant32bit_vgpr_v8f32: ; GFX6: ; %bb.0: ; %entry ; GFX6-NEXT: v_mov_b32_e32 v4, v0 @@ -88,6 +88,6 @@ define amdgpu_ps <8 x float> @load_constant32bit_vgpr_v8f32(<8 x float> addrspac ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog entry: - %load = load <8 x float>, <8 x float> addrspace(6)* %arg, align 32 + %load = load <8 x float>, ptr addrspace(6) %arg, align 32 ret <8 x float> %load } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll index 0d9abad9aa764..6f9045d4cb664 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll @@ -7,7 +7,7 @@ ; FIXME: ; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s -define <4 x i32> @load_lds_v4i32(<4 x i32> addrspace(3)* %ptr) { +define <4 x i32> @load_lds_v4i32(ptr addrspace(3) %ptr) { ; GFX9-LABEL: load_lds_v4i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -38,11 +38,11 @@ define <4 x i32> @load_lds_v4i32(<4 x i32> addrspace(3)* %ptr) { ; GFX11-NEXT: ds_load_b128 v[0:3], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr + %load = load <4 x i32>, ptr addrspace(3) %ptr ret <4 x i32> %load } -define <4 x i32> @load_lds_v4i32_align1(<4 x i32> addrspace(3)* %ptr) { +define <4 x i32> @load_lds_v4i32_align1(ptr addrspace(3) %ptr) { ; GFX9-LABEL: load_lds_v4i32_align1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -245,11 +245,11 @@ define <4 x i32> @load_lds_v4i32_align1(<4 x i32> addrspace(3)* %ptr) { ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX11-NEXT: v_or3_b32 v3, v11, v12, v10 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 1 + %load = load <4 x i32>, ptr addrspace(3) %ptr, align 1 ret <4 x i32> %load } -define <4 x i32> @load_lds_v4i32_align2(<4 x i32> addrspace(3)* %ptr) { +define <4 x i32> @load_lds_v4i32_align2(ptr addrspace(3) %ptr) { ; GFX9-LABEL: load_lds_v4i32_align2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -339,11 +339,11 @@ define <4 x i32> @load_lds_v4i32_align2(<4 x i32> addrspace(3)* %ptr) { ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_lshl_or_b32 v3, v8, 16, v7 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 2 + %load = load <4 x i32>, ptr addrspace(3) %ptr, align 2 ret <4 x i32> %load } -define <4 x i32> @load_lds_v4i32_align4(<4 x i32> addrspace(3)* %ptr) { +define <4 x i32> @load_lds_v4i32_align4(ptr addrspace(3) %ptr) { ; GFX9-LABEL: load_lds_v4i32_align4: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -382,11 +382,11 @@ define <4 x i32> @load_lds_v4i32_align4(<4 x i32> addrspace(3)* %ptr) { ; GFX11-NEXT: ds_load_2addr_b32 v[2:3], v2 offset0:2 offset1:3 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 4 + %load = load <4 x i32>, ptr addrspace(3) %ptr, align 4 ret <4 x i32> %load } -define <4 x i32> @load_lds_v4i32_align8(<4 x i32> addrspace(3)* %ptr) { +define <4 x i32> @load_lds_v4i32_align8(ptr addrspace(3) %ptr) { ; GFX9-LABEL: load_lds_v4i32_align8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -419,11 +419,11 @@ define <4 x i32> @load_lds_v4i32_align8(<4 x i32> addrspace(3)* %ptr) { ; GFX11-NEXT: ds_load_2addr_b64 v[0:3], v0 offset1:1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 8 + %load = load <4 x i32>, ptr addrspace(3) %ptr, align 8 ret <4 x i32> %load } -define <4 x i32> @load_lds_v4i32_align16(<4 x i32> addrspace(3)* %ptr) { +define <4 x i32> @load_lds_v4i32_align16(ptr addrspace(3) %ptr) { ; GFX9-LABEL: load_lds_v4i32_align16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -454,6 +454,6 @@ define <4 x i32> @load_lds_v4i32_align16(<4 x i32> addrspace(3)* %ptr) { ; GFX11-NEXT: ds_load_b128 v[0:3], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 16 + %load = load <4 x i32>, ptr addrspace(3) %ptr, align 16 ret <4 x i32> %load } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll index 61d7a9f32c7e3..0bb4db73db640 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll @@ -7,7 +7,7 @@ ; FIXME: ; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s -define <3 x i32> @load_lds_v3i32(<3 x i32> addrspace(3)* %ptr) { +define <3 x i32> @load_lds_v3i32(ptr addrspace(3) %ptr) { ; GFX9-LABEL: load_lds_v3i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -38,11 +38,11 @@ define <3 x i32> @load_lds_v3i32(<3 x i32> addrspace(3)* %ptr) { ; GFX11-NEXT: ds_load_b96 v[0:2], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr + %load = load <3 x i32>, ptr addrspace(3) %ptr ret <3 x i32> %load } -define <3 x i32> @load_lds_v3i32_align1(<3 x i32> addrspace(3)* %ptr) { +define <3 x i32> @load_lds_v3i32_align1(ptr addrspace(3) %ptr) { ; GFX9-LABEL: load_lds_v3i32_align1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -202,11 +202,11 @@ define <3 x i32> @load_lds_v3i32_align1(<3 x i32> addrspace(3)* %ptr) { ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-NEXT: v_or3_b32 v2, v8, v9, v7 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 1 + %load = load <3 x i32>, ptr addrspace(3) %ptr, align 1 ret <3 x i32> %load } -define <3 x i32> @load_lds_v3i32_align2(<3 x i32> addrspace(3)* %ptr) { +define <3 x i32> @load_lds_v3i32_align2(ptr addrspace(3) %ptr) { ; GFX9-LABEL: load_lds_v3i32_align2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -280,11 +280,11 @@ define <3 x i32> @load_lds_v3i32_align2(<3 x i32> addrspace(3)* %ptr) { ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_lshl_or_b32 v2, v6, 16, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 2 + %load = load <3 x i32>, ptr addrspace(3) %ptr, align 2 ret <3 x i32> %load } -define <3 x i32> @load_lds_v3i32_align4(<3 x i32> addrspace(3)* %ptr) { +define <3 x i32> @load_lds_v3i32_align4(ptr addrspace(3) %ptr) { ; GFX9-LABEL: load_lds_v3i32_align4: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -323,11 +323,11 @@ define <3 x i32> @load_lds_v3i32_align4(<3 x i32> addrspace(3)* %ptr) { ; GFX11-NEXT: ds_load_b32 v2, v2 offset:8 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 4 + %load = load <3 x i32>, ptr addrspace(3) %ptr, align 4 ret <3 x i32> %load } -define <3 x i32> @load_lds_v3i32_align8(<3 x i32> addrspace(3)* %ptr) { +define <3 x i32> @load_lds_v3i32_align8(ptr addrspace(3) %ptr) { ; GFX9-LABEL: load_lds_v3i32_align8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -366,11 +366,11 @@ define <3 x i32> @load_lds_v3i32_align8(<3 x i32> addrspace(3)* %ptr) { ; GFX11-NEXT: ds_load_b32 v2, v2 offset:8 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 8 + %load = load <3 x i32>, ptr addrspace(3) %ptr, align 8 ret <3 x i32> %load } -define <3 x i32> @load_lds_v3i32_align16(<3 x i32> addrspace(3)* %ptr) { +define <3 x i32> @load_lds_v3i32_align16(ptr addrspace(3) %ptr) { ; GFX9-LABEL: load_lds_v3i32_align16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -401,6 +401,6 @@ define <3 x i32> @load_lds_v3i32_align16(<3 x i32> addrspace(3)* %ptr) { ; GFX11-NEXT: ds_load_b96 v[0:2], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 16 + %load = load <3 x i32>, ptr addrspace(3) %ptr, align 16 ret <3 x i32> %load } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll index caaf3d750bba1..0234119c01c67 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll @@ -8,7 +8,7 @@ ; LDS alignment enforcement is controlled by a configuration register: ; SH_MEM_CONFIG.alignment_mode -define <4 x i32> @load_lds_v4i32_align1(<4 x i32> addrspace(3)* %ptr) { +define <4 x i32> @load_lds_v4i32_align1(ptr addrspace(3) %ptr) { ; GFX9-LABEL: load_lds_v4i32_align1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -89,11 +89,11 @@ define <4 x i32> @load_lds_v4i32_align1(<4 x i32> addrspace(3)* %ptr) { ; GFX11-NEXT: ds_load_b128 v[0:3], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 1 + %load = load <4 x i32>, ptr addrspace(3) %ptr, align 1 ret <4 x i32> %load } -define <3 x i32> @load_lds_v3i32_align1(<3 x i32> addrspace(3)* %ptr) { +define <3 x i32> @load_lds_v3i32_align1(ptr addrspace(3) %ptr) { ; GFX9-LABEL: load_lds_v3i32_align1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -162,11 +162,11 @@ define <3 x i32> @load_lds_v3i32_align1(<3 x i32> addrspace(3)* %ptr) { ; GFX11-NEXT: ds_load_b96 v[0:2], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 1 + %load = load <3 x i32>, ptr addrspace(3) %ptr, align 1 ret <3 x i32> %load } -define void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { +define void @store_lds_v4i32_align1(ptr addrspace(3) %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -225,11 +225,11 @@ define void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x) ; GFX11-NEXT: ds_store_b128 v0, v[1:4] ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 1 + store <4 x i32> %x, ptr addrspace(3) %out, align 1 ret void } -define void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { +define void @store_lds_v3i32_align1(ptr addrspace(3) %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -281,11 +281,11 @@ define void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) ; GFX11-NEXT: ds_store_b96 v0, v[1:3] ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 1 + store <3 x i32> %x, ptr addrspace(3) %out, align 1 ret void } -define amdgpu_ps void @test_s_load_constant_v8i32_align1(<8 x i32> addrspace(4)* inreg %ptr, <8 x i32> addrspace(1)* inreg %out) { +define amdgpu_ps void @test_s_load_constant_v8i32_align1(ptr addrspace(4) inreg %ptr, ptr addrspace(1) inreg %out) { ; GFX9-LABEL: test_s_load_constant_v8i32_align1: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v8, 0 @@ -336,7 +336,7 @@ define amdgpu_ps void @test_s_load_constant_v8i32_align1(<8 x i32> addrspace(4)* ; GFX11-NEXT: global_store_b128 v8, v[4:7], s[2:3] offset:16 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %load = load <8 x i32>, <8 x i32> addrspace(4)* %ptr, align 1 - store <8 x i32> %load, <8 x i32> addrspace(1)* %out + %load = load <8 x i32>, ptr addrspace(4) %ptr, align 1 + store <8 x i32> %load, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll index 672b4d9445d33..b5fa7de724bf9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll @@ -64,21 +64,21 @@ entry: br i1 %cond, label %bb0, label %bb1 bb0: - store volatile i32 123, i32 addrspace(1)* undef - store volatile i32 456, i32 addrspace(1)* undef - store volatile i32 999, i32 addrspace(1)* undef - store volatile i32 1000, i32 addrspace(1)* undef - store volatile i32 455, i32 addrspace(1)* undef - store volatile i32 23526, i32 addrspace(1)* undef + store volatile i32 123, ptr addrspace(1) undef + store volatile i32 456, ptr addrspace(1) undef + store volatile i32 999, ptr addrspace(1) undef + store volatile i32 1000, ptr addrspace(1) undef + store volatile i32 455, ptr addrspace(1) undef + store volatile i32 23526, ptr addrspace(1) undef br label %bb2 bb1: - store volatile i32 23526, i32 addrspace(1)* undef - store volatile i32 455, i32 addrspace(1)* undef - store volatile i32 1000, i32 addrspace(1)* undef - store volatile i32 456, i32 addrspace(1)* undef - store volatile i32 999, i32 addrspace(1)* undef - store volatile i32 123, i32 addrspace(1)* undef + store volatile i32 23526, ptr addrspace(1) undef + store volatile i32 455, ptr addrspace(1) undef + store volatile i32 1000, ptr addrspace(1) undef + store volatile i32 456, ptr addrspace(1) undef + store volatile i32 999, ptr addrspace(1) undef + store volatile i32 123, ptr addrspace(1) undef br label %bb2 bb2: @@ -146,13 +146,13 @@ entry: br i1 %cond, label %bb0, label %bb1 bb0: - store volatile i32 0, i32 addrspace(1)* @gv0 - store volatile i32 1, i32 addrspace(1)* @gv1 + store volatile i32 0, ptr addrspace(1) @gv0 + store volatile i32 1, ptr addrspace(1) @gv1 br label %bb2 bb1: - store volatile i32 0, i32 addrspace(1)* @gv2 - store volatile i32 1, i32 addrspace(1)* @gv3 + store volatile i32 0, ptr addrspace(1) @gv2 + store volatile i32 1, ptr addrspace(1) @gv3 br label %bb2 bb2: @@ -214,13 +214,13 @@ entry: br i1 %cond, label %bb0, label %bb1 bb0: - store volatile i32 0, i32 addrspace(1)* @static.gv0 - store volatile i32 1, i32 addrspace(1)* @static.gv1 + store volatile i32 0, ptr addrspace(1) @static.gv0 + store volatile i32 1, ptr addrspace(1) @static.gv1 br label %bb2 bb1: - store volatile i32 0, i32 addrspace(1)* @static.gv2 - store volatile i32 1, i32 addrspace(1)* @static.gv3 + store volatile i32 0, ptr addrspace(1) @static.gv2 + store volatile i32 1, ptr addrspace(1) @static.gv3 br label %bb2 bb2: @@ -228,7 +228,7 @@ bb2: } ; This would crash from using the wrong insert point -define void @sink_null_insert_pt(i32 addrspace(4)* %arg0) { +define void @sink_null_insert_pt(ptr addrspace(4) %arg0) { ; GFX9-LABEL: sink_null_insert_pt: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -255,7 +255,7 @@ define void @sink_null_insert_pt(i32 addrspace(4)* %arg0) { ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] entry: - %load0 = load volatile i32, i32 addrspace(1)* null, align 4 + %load0 = load volatile i32, ptr addrspace(1) null, align 4 br label %bb1 bb1: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll index ff3f4363ee200..ab9ae86c5abcb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -o - %s | FileCheck %s -define amdgpu_cs void @test1(i32 %arg1, <4 x i32> inreg %arg2, i32, i32 addrspace(6)* inreg %arg3) { +define amdgpu_cs void @test1(i32 %arg1, <4 x i32> inreg %arg2, i32, ptr addrspace(6) inreg %arg3) { ; CHECK-LABEL: test1: ; CHECK: ; %bb.0: ; %.entry ; CHECK-NEXT: v_and_b32_e32 v3, 0x3ffffffc, v0 @@ -15,23 +15,23 @@ define amdgpu_cs void @test1(i32 %arg1, <4 x i32> inreg %arg2, i32, i32 addrspac ; CHECK-NEXT: s_endpgm .entry: %bs1 = and i32 %arg1, 1073741820 - %ep1 = getelementptr i32, i32 addrspace(6)* %arg3, i32 %bs1 - %ad1 = ptrtoint i32 addrspace(6)* %ep1 to i32 + %ep1 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs1 + %ad1 = ptrtoint ptr addrspace(6) %ep1 to i32 call void @llvm.amdgcn.raw.buffer.store.i32(i32 11, <4 x i32> %arg2, i32 %ad1, i32 0, i32 0) %bs2 = or i32 %bs1, 1 - %ep2 = getelementptr i32, i32 addrspace(6)* %arg3, i32 %bs2 - %ad2 = ptrtoint i32 addrspace(6)* %ep2 to i32 + %ep2 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs2 + %ad2 = ptrtoint ptr addrspace(6) %ep2 to i32 call void @llvm.amdgcn.raw.buffer.store.i32(i32 22, <4 x i32> %arg2, i32 %ad2, i32 0, i32 0) %bs3 = or i32 %bs1, 2 - %ep3 = getelementptr i32, i32 addrspace(6)* %arg3, i32 %bs3 - %ad3 = ptrtoint i32 addrspace(6)* %ep3 to i32 + %ep3 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs3 + %ad3 = ptrtoint ptr addrspace(6) %ep3 to i32 call void @llvm.amdgcn.raw.buffer.store.i32(i32 33, <4 x i32> %arg2, i32 %ad3, i32 0, i32 0) %bs4 = or i32 %bs1, 3 - %ep4 = getelementptr i32, i32 addrspace(6)* %arg3, i32 %bs4 - %ad4 = ptrtoint i32 addrspace(6)* %ep4 to i32 + %ep4 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs4 + %ad4 = ptrtoint ptr addrspace(6) %ep4 to i32 call void @llvm.amdgcn.raw.buffer.store.i32(i32 44, <4 x i32> %arg2, i32 %ad4, i32 0, i32 0) ret void @@ -50,23 +50,23 @@ define amdgpu_cs void @test2(i32 %arg1, <4 x i32> inreg %arg2) { ; CHECK-NEXT: s_endpgm .entry: %bs1 = and i32 %arg1, 1073741820 - %ep1 = getelementptr <{ [64 x i32] }>, <{ [64 x i32] }> addrspace(6)* null, i32 0, i32 0, i32 %bs1 - %ad1 = ptrtoint i32 addrspace(6)* %ep1 to i32 + %ep1 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs1 + %ad1 = ptrtoint ptr addrspace(6) %ep1 to i32 call void @llvm.amdgcn.raw.buffer.store.i32(i32 11, <4 x i32> %arg2, i32 %ad1, i32 0, i32 0) %bs2 = or i32 %bs1, 1 - %ep2 = getelementptr <{ [64 x i32] }>, <{ [64 x i32] }> addrspace(6)* null, i32 0, i32 0, i32 %bs2 - %ad2 = ptrtoint i32 addrspace(6)* %ep2 to i32 + %ep2 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs2 + %ad2 = ptrtoint ptr addrspace(6) %ep2 to i32 call void @llvm.amdgcn.raw.buffer.store.i32(i32 22, <4 x i32> %arg2, i32 %ad2, i32 0, i32 0) %bs3 = or i32 %bs1, 2 - %ep3 = getelementptr <{ [64 x i32] }>, <{ [64 x i32] }> addrspace(6)* null, i32 0, i32 0, i32 %bs3 - %ad3 = ptrtoint i32 addrspace(6)* %ep3 to i32 + %ep3 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs3 + %ad3 = ptrtoint ptr addrspace(6) %ep3 to i32 call void @llvm.amdgcn.raw.buffer.store.i32(i32 33, <4 x i32> %arg2, i32 %ad3, i32 0, i32 0) %bs4 = or i32 %bs1, 3 - %ep4 = getelementptr <{ [64 x i32] }>, <{ [64 x i32] }> addrspace(6)* null, i32 0, i32 0, i32 %bs4 - %ad4 = ptrtoint i32 addrspace(6)* %ep4 to i32 + %ep4 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs4 + %ad4 = ptrtoint ptr addrspace(6) %ep4 to i32 call void @llvm.amdgcn.raw.buffer.store.i32(i32 44, <4 x i32> %arg2, i32 %ad4, i32 0, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll index 7295442324b6f..4e1728a369dc0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll @@ -5,7 +5,7 @@ ; Test end to end matching of addressing modes when MUBUF is used for ; global memory. -define amdgpu_ps void @mubuf_store_sgpr_ptr(i32 addrspace(1)* inreg %ptr) { +define amdgpu_ps void @mubuf_store_sgpr_ptr(ptr addrspace(1) inreg %ptr) { ; GFX6-LABEL: mubuf_store_sgpr_ptr: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -25,11 +25,11 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr(i32 addrspace(1)* inreg %ptr) { ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm - store i32 0, i32 addrspace(1)* %ptr + store i32 0, ptr addrspace(1) %ptr ret void } -define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) { +define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(ptr addrspace(1) inreg %ptr) { ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -51,12 +51,12 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(i32 addrspace(1)* inreg % ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; GFX7-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 - store i32 0, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095 + store i32 0, ptr addrspace(1) %gep ret void } -define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) { +define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) { ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967296: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s4, 0 @@ -84,12 +84,12 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(i32 addrspace(1)* i ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; GFX7-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 - store i32 0, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296 + store i32 0, ptr addrspace(1) %gep ret void } -define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(i32 addrspace(1)* inreg %ptr) { +define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(ptr addrspace(1) inreg %ptr) { ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967297: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s4, 4 @@ -117,12 +117,12 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(i32 addrspace(1)* i ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; GFX7-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967297 - store i32 0, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967297 + store i32 0, ptr addrspace(1) %gep ret void } -define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4096(i32 addrspace(1)* inreg %ptr) { +define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4096(ptr addrspace(1) inreg %ptr) { ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4096: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -144,12 +144,12 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4096(i32 addrspace(1)* inreg % ; GFX7-NEXT: s_movk_i32 s4, 0x4000 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; GFX7-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4096 - store i32 0, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4096 + store i32 0, ptr addrspace(1) %gep ret void } -define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) { +define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4095(ptr addrspace(1) %ptr) { ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s2, 0 @@ -169,12 +169,12 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) { ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 ; GFX7-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 - store i32 0, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095 + store i32 0, ptr addrspace(1) %gep ret void } -define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) { +define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(ptr addrspace(1) %ptr) { ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967296: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, 0 @@ -194,12 +194,12 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(i32 addrspace(1)* % ; GFX7-NEXT: s_mov_b32 s2, s0 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; GFX7-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 - store i32 0, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296 + store i32 0, ptr addrspace(1) %gep ret void } -define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(i32 addrspace(1)* %ptr) { +define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(ptr addrspace(1) %ptr) { ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967297: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, 4 @@ -219,12 +219,12 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(i32 addrspace(1)* % ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; GFX7-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967297 - store i32 0, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967297 + store i32 0, ptr addrspace(1) %gep ret void } -define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4096(i32 addrspace(1)* %ptr) { +define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4096(ptr addrspace(1) %ptr) { ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4096: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s2, 0 @@ -244,12 +244,12 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4096(i32 addrspace(1)* %ptr) { ; GFX7-NEXT: s_movk_i32 s4, 0x4000 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 ; GFX7-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4096 - store i32 0, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4096 + store i32 0, ptr addrspace(1) %gep ret void } -define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(i32 addrspace(1)* inreg %ptr, i32 inreg %soffset) { +define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(ptr addrspace(1) inreg %ptr, i32 inreg %soffset) { ; GFX6-LABEL: mubuf_store_sgpr_ptr_sgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_ashr_i32 s5, s4, 31 @@ -277,12 +277,12 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(i32 addrspace(1)* inreg ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; GFX7-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset - store i32 0, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %soffset + store i32 0, ptr addrspace(1) %gep ret void } -define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) { +define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(ptr addrspace(1) %ptr, i32 inreg %soffset) { ; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_ashr_i32 s3, s2, 31 @@ -302,12 +302,12 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(i32 addrspace(1)* %ptr, ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; GFX7-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset - store i32 0, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %soffset + store i32 0, ptr addrspace(1) %gep ret void } -define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace(1)* %ptr, i32 inreg %soffset) { +define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(ptr addrspace(1) %ptr, i32 inreg %soffset) { ; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_ashr_i32 s3, s2, 31 @@ -327,13 +327,13 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace( ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024 ; GFX7-NEXT: s_endpgm - %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset - %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 256 - store i32 0, i32 addrspace(1)* %gep1 + %gep0 = getelementptr i32, ptr addrspace(1) %ptr, i32 %soffset + %gep1 = getelementptr i32, ptr addrspace(1) %gep0, i32 256 + store i32 0, ptr addrspace(1) %gep1 ret void } -define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) { +define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(ptr addrspace(1) %ptr, i32 inreg %soffset) { ; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_ashr_i32 s3, s2, 31 @@ -353,13 +353,13 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace( ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024 ; GFX7-NEXT: s_endpgm - %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 256 - %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %soffset - store i32 0, i32 addrspace(1)* %gep1 + %gep0 = getelementptr i32, ptr addrspace(1) %ptr, i32 256 + %gep1 = getelementptr i32, ptr addrspace(1) %gep0, i32 %soffset + store i32 0, ptr addrspace(1) %gep1 ret void } -define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) { +define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) { ; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -383,12 +383,12 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; GFX7-NEXT: s_endpgm - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset - store i32 0, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %voffset + store i32 0, ptr addrspace(1) %gep ret void } -define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset_offset4095(i32 addrspace(1)* inreg %ptr, i32 %voffset) { +define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset_offset4095(ptr addrspace(1) inreg %ptr, i32 %voffset) { ; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -414,12 +414,12 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset_offset4095(i32 addrspace ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 ; GFX7-NEXT: s_endpgm - %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset - %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 4095 - store i32 0, i32 addrspace(1)* %gep1 + %gep0 = getelementptr i32, ptr addrspace(1) %ptr, i32 %voffset + %gep1 = getelementptr i32, ptr addrspace(1) %gep0, i32 4095 + store i32 0, ptr addrspace(1) %gep1 ret void } -define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) { +define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) { ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -445,13 +445,13 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(i32 addrspace ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 ; GFX7-NEXT: s_endpgm - %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 4095 - %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %voffset - store i32 0, i32 addrspace(1)* %gep1 + %gep0 = getelementptr i32, ptr addrspace(1) %ptr, i32 4095 + %gep1 = getelementptr i32, ptr addrspace(1) %gep0, i32 %voffset + store i32 0, ptr addrspace(1) %gep1 ret void } -define amdgpu_ps float @mubuf_load_sgpr_ptr(float addrspace(1)* inreg %ptr) { +define amdgpu_ps float @mubuf_load_sgpr_ptr(ptr addrspace(1) inreg %ptr) { ; GFX6-LABEL: mubuf_load_sgpr_ptr: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -471,11 +471,11 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr(float addrspace(1)* inreg %ptr) { ; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog - %val = load volatile float, float addrspace(1)* %ptr + %val = load volatile float, ptr addrspace(1) %ptr ret float %val } -define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(float addrspace(1)* inreg %ptr) { +define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(ptr addrspace(1) inreg %ptr) { ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -497,12 +497,12 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(float addrspace(1)* inreg ; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s4 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr float, float addrspace(1)* %ptr, i64 4095 - %val = load volatile float, float addrspace(1)* %gep + %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4095 + %val = load volatile float, ptr addrspace(1) %gep ret float %val } -define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(float addrspace(1)* inreg %ptr) { +define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) { ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967296: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s4, 0 @@ -530,12 +530,12 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(float addrspace(1)* ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967296 - %val = load volatile float, float addrspace(1)* %gep + %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4294967296 + %val = load volatile float, ptr addrspace(1) %gep ret float %val } -define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(float addrspace(1)* inreg %ptr) { +define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(ptr addrspace(1) inreg %ptr) { ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967297: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s4, 4 @@ -563,12 +563,12 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(float addrspace(1)* ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967297 - %val = load volatile float, float addrspace(1)* %gep + %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4294967297 + %val = load volatile float, ptr addrspace(1) %gep ret float %val } -define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4096(float addrspace(1)* inreg %ptr) { +define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4096(ptr addrspace(1) inreg %ptr) { ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4096: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -590,12 +590,12 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4096(float addrspace(1)* inreg ; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s4 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr float, float addrspace(1)* %ptr, i64 4096 - %val = load volatile float, float addrspace(1)* %gep + %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4096 + %val = load volatile float, ptr addrspace(1) %gep ret float %val } -define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4095(float addrspace(1)* %ptr) { +define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4095(ptr addrspace(1) %ptr) { ; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s2, 0 @@ -615,12 +615,12 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4095(float addrspace(1)* %ptr) ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr float, float addrspace(1)* %ptr, i64 4095 - %val = load volatile float, float addrspace(1)* %gep + %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4095 + %val = load volatile float, ptr addrspace(1) %gep ret float %val } -define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(float addrspace(1)* %ptr) { +define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(ptr addrspace(1) %ptr) { ; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967296: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, 0 @@ -640,12 +640,12 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(float addrspace(1)* ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967296 - %val = load volatile float, float addrspace(1)* %gep + %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4294967296 + %val = load volatile float, ptr addrspace(1) %gep ret float %val } -define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(float addrspace(1)* %ptr) { +define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(ptr addrspace(1) %ptr) { ; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967297: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, 4 @@ -665,12 +665,12 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(float addrspace(1)* ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967297 - %val = load volatile float, float addrspace(1)* %gep + %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4294967297 + %val = load volatile float, ptr addrspace(1) %gep ret float %val } -define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4096(float addrspace(1)* %ptr) { +define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4096(ptr addrspace(1) %ptr) { ; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4096: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s2, 0 @@ -690,12 +690,12 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4096(float addrspace(1)* %ptr) ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr float, float addrspace(1)* %ptr, i64 4096 - %val = load volatile float, float addrspace(1)* %gep + %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4096 + %val = load volatile float, ptr addrspace(1) %gep ret float %val } -define amdgpu_ps float @mubuf_load_sgpr_ptr_sgpr_offset(float addrspace(1)* inreg %ptr, i32 inreg %soffset) { +define amdgpu_ps float @mubuf_load_sgpr_ptr_sgpr_offset(ptr addrspace(1) inreg %ptr, i32 inreg %soffset) { ; GFX6-LABEL: mubuf_load_sgpr_ptr_sgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_ashr_i32 s5, s4, 31 @@ -723,12 +723,12 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_sgpr_offset(float addrspace(1)* inre ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr float, float addrspace(1)* %ptr, i32 %soffset - %val = load volatile float, float addrspace(1)* %gep + %gep = getelementptr float, ptr addrspace(1) %ptr, i32 %soffset + %val = load volatile float, ptr addrspace(1) %gep ret float %val } -define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset(float addrspace(1)* %ptr, i32 inreg %soffset) { +define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset(ptr addrspace(1) %ptr, i32 inreg %soffset) { ; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_ashr_i32 s3, s2, 31 @@ -748,12 +748,12 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset(float addrspace(1)* %ptr ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr float, float addrspace(1)* %ptr, i32 %soffset - %val = load volatile float, float addrspace(1)* %gep + %gep = getelementptr float, ptr addrspace(1) %ptr, i32 %soffset + %val = load volatile float, ptr addrspace(1) %gep ret float %val } -define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(float addrspace(1)* %ptr, i32 inreg %soffset) { +define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(ptr addrspace(1) %ptr, i32 inreg %soffset) { ; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset_offset256: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_ashr_i32 s3, s2, 31 @@ -773,13 +773,13 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(float addrspac ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog - %gep0 = getelementptr float, float addrspace(1)* %ptr, i32 %soffset - %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 256 - %val = load volatile float, float addrspace(1)* %gep1 + %gep0 = getelementptr float, ptr addrspace(1) %ptr, i32 %soffset + %gep1 = getelementptr float, ptr addrspace(1) %gep0, i32 256 + %val = load volatile float, ptr addrspace(1) %gep1 ret float %val } -define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspace(1)* %ptr, i32 inreg %soffset) { +define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(ptr addrspace(1) %ptr, i32 inreg %soffset) { ; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_ashr_i32 s3, s2, 31 @@ -799,13 +799,13 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspac ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog - %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 256 - %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 %soffset - %val = load volatile float, float addrspace(1)* %gep1 + %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 256 + %gep1 = getelementptr float, ptr addrspace(1) %gep0, i32 %soffset + %val = load volatile float, ptr addrspace(1) %gep1 ret float %val } -define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) { +define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) { ; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -829,12 +829,12 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset(float addrspace(1)* inre ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr float, float addrspace(1)* %ptr, i32 %voffset - %val = load volatile float, float addrspace(1)* %gep + %gep = getelementptr float, ptr addrspace(1) %ptr, i32 %voffset + %val = load volatile float, ptr addrspace(1) %gep ret float %val } -define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset_offset4095(float addrspace(1)* inreg %ptr, i32 %voffset) { +define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset_offset4095(ptr addrspace(1) inreg %ptr, i32 %voffset) { ; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -860,12 +860,12 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset_offset4095(float addrspa ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog - %gep0 = getelementptr float, float addrspace(1)* %ptr, i32 %voffset - %gep1 = getelementptr float, float addrspace(1)* %gep0, i64 4095 - %val = load volatile float, float addrspace(1)* %gep1 + %gep0 = getelementptr float, ptr addrspace(1) %ptr, i32 %voffset + %gep1 = getelementptr float, ptr addrspace(1) %gep0, i64 4095 + %val = load volatile float, ptr addrspace(1) %gep1 ret float %val } -define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) { +define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) { ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -891,13 +891,13 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(float addrspa ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog - %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 4095 - %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 %voffset - %val = load volatile float, float addrspace(1)* %gep1 + %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 4095 + %gep1 = getelementptr float, ptr addrspace(1) %gep0, i32 %voffset + %val = load volatile float, ptr addrspace(1) %gep1 ret float %val } -define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) { +define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(ptr addrspace(1) inreg %ptr) { ; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -926,13 +926,13 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(i32 addrspace(1)* in ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 - %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095 + %result = atomicrmw add ptr addrspace(1) %gep, i32 2 seq_cst %cast = bitcast i32 %result to float ret float %cast } -define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) { +define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) { ; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s4, 0 @@ -967,13 +967,13 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(i32 addrspace( ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 - %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296 + %result = atomicrmw add ptr addrspace(1) %gep, i32 2 seq_cst %cast = bitcast i32 %result to float ret float %cast } -define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) { +define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(ptr addrspace(1) %ptr) { ; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v2, 2 @@ -1002,13 +1002,13 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(i32 addrspace(1)* %p ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: v_mov_b32_e32 v0, v2 ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 - %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095 + %result = atomicrmw add ptr addrspace(1) %gep, i32 2 seq_cst %cast = bitcast i32 %result to float ret float %cast } -define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) { +define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(ptr addrspace(1) %ptr) { ; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, 0 @@ -1037,13 +1037,13 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(i32 addrspace( ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: v_mov_b32_e32 v0, v2 ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 - %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296 + %result = atomicrmw add ptr addrspace(1) %gep, i32 2 seq_cst %cast = bitcast i32 %result to float ret float %cast } -define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) { +define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) { ; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -1076,13 +1076,13 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(i32 addrspace(1)* i ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: v_mov_b32_e32 v0, v2 ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset - %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %voffset + %result = atomicrmw add ptr addrspace(1) %gep, i32 2 seq_cst %cast = bitcast i32 %result to float ret float %cast } -define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr, i32 %old, i32 %in) { +define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(ptr addrspace(1) inreg %ptr, i32 %old, i32 %in) { ; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -1113,14 +1113,14 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(i32 addrspace(1)* inre ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: v_mov_b32_e32 v0, v1 ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 - %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095 + %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst %result = extractvalue { i32, i1 } %result.struct, 0 %cast = bitcast i32 %result to float ret float %cast } -define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr, i32 %old, i32 %in) { +define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr, i32 %old, i32 %in) { ; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s4, 0 @@ -1157,14 +1157,14 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(i32 addrspace(1) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: v_mov_b32_e32 v0, v1 ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 - %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296 + %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst %result = extractvalue { i32, i1 } %result.struct, 0 %cast = bitcast i32 %result to float ret float %cast } -define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr, i32 %old, i32 %in) { +define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(ptr addrspace(1) %ptr, i32 %old, i32 %in) { ; GFX6-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v4, v2 @@ -1193,14 +1193,14 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: v_mov_b32_e32 v0, v3 ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 - %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095 + %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst %result = extractvalue { i32, i1 } %result.struct, 0 %cast = bitcast i32 %result to float ret float %cast } -define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr, i32 %old, i32 %in) { +define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(ptr addrspace(1) %ptr, i32 %old, i32 %in) { ; GFX6-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4294967296: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, 0 @@ -1229,14 +1229,14 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(i32 addrspace(1) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: v_mov_b32_e32 v0, v3 ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 - %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296 + %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst %result = extractvalue { i32, i1 } %result.struct, 0 %cast = bitcast i32 %result to float ret float %cast } -define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset, i32 %old, i32 %in) { +define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset, i32 %old, i32 %in) { ; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_vgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v3, v1 @@ -1269,8 +1269,8 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inr ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: v_mov_b32_e32 v0, v2 ; GFX7-NEXT: ; return to shader part epilog - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset - %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %voffset + %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst %result = extractvalue { i32, i1 } %result.struct, 0 %cast = bitcast i32 %result to float ret float %cast diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll index bf0758a645eba..fec564f9b45db 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll @@ -10,7 +10,7 @@ ; FIXME: FunctionLoweringInfo unhelpfully doesn't preserve an ; alignment less than the stack alignment. -define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align4(i32 addrspace(1)* %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) { +define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align4(ptr addrspace(1) %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) { ; GCN-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align4: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dword s6, s[4:5], 0x8 @@ -56,24 +56,23 @@ entry: bb.0: %alloca = alloca [16 x i32], align 4, addrspace(5) - %gep0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0 - %gep1 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1 + %gep1 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 1 %cond1 = icmp eq i32 %arg.cond1, 0 br i1 %cond1, label %bb.1, label %bb.2 bb.1: ; Use the alloca outside of the defining block. - store i32 0, i32 addrspace(5)* %gep0 - store i32 1, i32 addrspace(5)* %gep1 - %gep2 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %in - %load = load i32, i32 addrspace(5)* %gep2 + store i32 0, ptr addrspace(5) %alloca + store i32 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %in + %load = load i32, ptr addrspace(5) %gep2 %tid = call i32 @llvm.amdgcn.workitem.id.x() %add = add i32 %load, %tid - store i32 %add, i32 addrspace(1)* %out + store i32 %add, ptr addrspace(1) %out br label %bb.2 bb.2: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef ret void } ; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4112 @@ -82,7 +81,7 @@ bb.2: ; ASSUME1024: .amdhsa_private_segment_fixed_size 1040 ; ASSUME1024: ; ScratchSize: 1040 -define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align64(i32 addrspace(1)* %out, i32 %arg.cond, i32 %in) { +define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align64(ptr addrspace(1) %out, i32 %arg.cond, i32 %in) { ; GCN-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dword s6, s[4:5], 0x8 @@ -123,19 +122,18 @@ entry: bb.0: %alloca = alloca [16 x i32], align 64, addrspace(5) - %gep0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0 - %gep1 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1 - store i32 0, i32 addrspace(5)* %gep0 - store i32 1, i32 addrspace(5)* %gep1 - %gep2 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %in - %load = load i32, i32 addrspace(5)* %gep2 + %gep1 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 1 + store i32 0, ptr addrspace(5) %alloca + store i32 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %in + %load = load i32, ptr addrspace(5) %gep2 %tid = call i32 @llvm.amdgcn.workitem.id.x() %add = add i32 %load, %tid - store i32 %add, i32 addrspace(1)* %out + store i32 %add, ptr addrspace(1) %out br label %bb.1 bb.1: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef ret void } @@ -146,7 +144,7 @@ bb.1: ; ASSUME1024: ; ScratchSize: 1088 -define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) { +define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) { ; GCN-LABEL: func_non_entry_block_static_alloca_align4: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -189,28 +187,27 @@ entry: bb.0: %alloca = alloca [16 x i32], align 4, addrspace(5) - %gep0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0 - %gep1 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1 + %gep1 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 1 %cond1 = icmp eq i32 %arg.cond1, 0 br i1 %cond1, label %bb.1, label %bb.2 bb.1: ; Use the alloca outside of the defining block. - store i32 0, i32 addrspace(5)* %gep0 - store i32 1, i32 addrspace(5)* %gep1 - %gep2 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %in - %load = load i32, i32 addrspace(5)* %gep2 + store i32 0, ptr addrspace(5) %alloca + store i32 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %in + %load = load i32, ptr addrspace(5) %gep2 %tid = call i32 @llvm.amdgcn.workitem.id.x() %add = add i32 %load, %tid - store i32 %add, i32 addrspace(1)* %out + store i32 %add, ptr addrspace(1) %out br label %bb.2 bb.2: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef ret void } -define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out, i32 %arg.cond, i32 %in) { +define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i32 %arg.cond, i32 %in) { ; GCN-LABEL: func_non_entry_block_static_alloca_align64: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -250,19 +247,18 @@ entry: bb.0: %alloca = alloca [16 x i32], align 64, addrspace(5) - %gep0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0 - %gep1 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1 - store i32 0, i32 addrspace(5)* %gep0 - store i32 1, i32 addrspace(5)* %gep1 - %gep2 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %in - %load = load i32, i32 addrspace(5)* %gep2 + %gep1 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 1 + store i32 0, ptr addrspace(5) %alloca + store i32 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %in + %load = load i32, ptr addrspace(5) %gep2 %tid = call i32 @llvm.amdgcn.workitem.id.x() %add = add i32 %load, %tid - store i32 %add, i32 addrspace(1)* %out + store i32 %add, ptr addrspace(1) %out br label %bb.1 bb.1: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll index 3e4b0d4fa1e1d..7b222fa054f12 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll @@ -21,7 +21,7 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: load_1d_vgpr_vaddr__sgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): @@ -40,10 +40,10 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) - store <4 x float> %v, <4 x float> addrspace(1)* undef + store <4 x float> %v, ptr addrspace(1) undef ret void } @@ -67,7 +67,7 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: load_1d_sgpr_vaddr__sgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): @@ -87,10 +87,10 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) - store <4 x float> %v, <4 x float> addrspace(1)* undef + store <4 x float> %v, ptr addrspace(1) undef ret void } @@ -155,7 +155,7 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.5: ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: load_1d_vgpr_vaddr__vgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): @@ -216,10 +216,10 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) - store <4 x float> %v, <4 x float> addrspace(1)* undef + store <4 x float> %v, ptr addrspace(1) undef ret void } @@ -285,7 +285,7 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.5: ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: load_1d_sgpr_vaddr__vgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): @@ -347,10 +347,10 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) - store <4 x float> %v, <4 x float> addrspace(1)* undef + store <4 x float> %v, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll index 8f62f75b97c6f..0d7496cb10e08 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll @@ -25,7 +25,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp ; GREEDY: bb.1 (%ir-block.0): @@ -48,10 +48,10 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - store <4 x float> %v, <4 x float> addrspace(1)* undef + store <4 x float> %v, ptr addrspace(1) undef ret void } @@ -79,7 +79,7 @@ define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32) ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp ; GREEDY: bb.1 (%ir-block.0): @@ -103,10 +103,10 @@ define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32) ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - store <4 x float> %v, <4 x float> addrspace(1)* undef + store <4 x float> %v, ptr addrspace(1) undef ret void } @@ -175,7 +175,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; FAST-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.5: - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp ; GREEDY: bb.1 (%ir-block.0): @@ -240,10 +240,10 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - store <4 x float> %v, <4 x float> addrspace(1)* undef + store <4 x float> %v, ptr addrspace(1) undef ret void } @@ -304,7 +304,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; FAST-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.5: - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp ; GREEDY: bb.1 (%ir-block.0): @@ -361,10 +361,10 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - store <4 x float> %v, <4 x float> addrspace(1)* undef + store <4 x float> %v, ptr addrspace(1) undef ret void } @@ -445,7 +445,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; FAST-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.5: - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp ; GREEDY: bb.1 (%ir-block.0): @@ -522,10 +522,10 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - store <4 x float> %v, <4 x float> addrspace(1)* undef + store <4 x float> %v, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll index b941a250004eb..f0b75187ad874 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll @@ -641,7 +641,7 @@ define amdgpu_ps void @s_buffer_load_i96_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) - ; CHECK-NEXT: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_i96_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -658,10 +658,10 @@ define amdgpu_ps void @s_buffer_load_i96_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) - ; GREEDY-NEXT: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; GREEDY-NEXT: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %val = call i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32> %rsrc, i32 %soffset, i32 0) - store i96 %val, i96 addrspace(1)* undef + store i96 %val, ptr addrspace(1) undef ret void } @@ -684,10 +684,10 @@ define amdgpu_ps void @s_buffer_load_i256_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) - ; CHECK-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i256 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 16, align 8, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_i256_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -706,13 +706,13 @@ define amdgpu_ps void @s_buffer_load_i256_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) - ; GREEDY-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i256 addrspace(1)* undef`, align 8, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 16, align 8, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %val = call i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32> %rsrc, i32 %soffset, i32 0) - store i256 %val, i256 addrspace(1)* undef + store i256 %val, ptr addrspace(1) undef ret void } @@ -737,16 +737,16 @@ define amdgpu_ps void @s_buffer_load_i512_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) - ; CHECK-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i512 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 16, align 8, addrspace 1) ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; CHECK-NEXT: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 32, align 8, addrspace 1) ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; CHECK-NEXT: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 48, align 8, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_i512_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -767,19 +767,19 @@ define amdgpu_ps void @s_buffer_load_i512_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) - ; GREEDY-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i512 addrspace(1)* undef`, align 8, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 16, align 8, addrspace 1) ; GREEDY-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 ; GREEDY-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GREEDY-NEXT: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 32, align 8, addrspace 1) ; GREEDY-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 ; GREEDY-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GREEDY-NEXT: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 48, align 8, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %val = call i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32> %rsrc, i32 %soffset, i32 0) - store i512 %val, i512 addrspace(1)* undef + store i512 %val, ptr addrspace(1) undef ret void } @@ -802,10 +802,10 @@ define amdgpu_ps void @s_buffer_load_v16i16_vgpr_offset(<4 x i32> inreg %rsrc, i ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) - ; CHECK-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v16i16_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -824,13 +824,13 @@ define amdgpu_ps void @s_buffer_load_v16i16_vgpr_offset(<4 x i32> inreg %rsrc, i ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) - ; GREEDY-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %val = call <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32> %rsrc, i32 %soffset, i32 0) - store <16 x i16> %val, <16 x i16> addrspace(1)* undef + store <16 x i16> %val, ptr addrspace(1) undef ret void } @@ -855,16 +855,16 @@ define amdgpu_ps void @s_buffer_load_v32i16_vgpr_offset(<4 x i32> inreg %rsrc, i ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) - ; CHECK-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, align 64, addrspace 1) ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1) ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; CHECK-NEXT: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1) ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; CHECK-NEXT: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v32i16_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -885,19 +885,19 @@ define amdgpu_ps void @s_buffer_load_v32i16_vgpr_offset(<4 x i32> inreg %rsrc, i ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) - ; GREEDY-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, align 64, addrspace 1) ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1) ; GREEDY-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 ; GREEDY-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GREEDY-NEXT: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1) ; GREEDY-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 ; GREEDY-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GREEDY-NEXT: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %val = call <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32> %rsrc, i32 %soffset, i32 0) - store <32 x i16> %val, <32 x i16> addrspace(1)* undef + store <32 x i16> %val, ptr addrspace(1) undef ret void } @@ -920,10 +920,10 @@ define amdgpu_ps void @s_buffer_load_v4i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) - ; CHECK-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v4i64_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -942,13 +942,13 @@ define amdgpu_ps void @s_buffer_load_v4i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) - ; GREEDY-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %val = call <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32> %rsrc, i32 %soffset, i32 0) - store <4 x i64> %val, <4 x i64> addrspace(1)* undef + store <4 x i64> %val, ptr addrspace(1) undef ret void } @@ -973,16 +973,16 @@ define amdgpu_ps void @s_buffer_load_v8i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) - ; CHECK-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, align 64, addrspace 1) ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1) ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; CHECK-NEXT: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1) ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; CHECK-NEXT: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v8i64_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -1003,19 +1003,19 @@ define amdgpu_ps void @s_buffer_load_v8i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) - ; GREEDY-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, align 64, addrspace 1) ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1) ; GREEDY-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 ; GREEDY-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GREEDY-NEXT: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1) ; GREEDY-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 ; GREEDY-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GREEDY-NEXT: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %val = call <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32> %rsrc, i32 %soffset, i32 0) - store <8 x i64> %val, <8 x i64> addrspace(1)* undef + store <8 x i64> %val, ptr addrspace(1) undef ret void } @@ -1038,10 +1038,10 @@ define amdgpu_ps void @s_buffer_load_v4p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) - ; CHECK-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v4p1_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -1060,13 +1060,13 @@ define amdgpu_ps void @s_buffer_load_v4p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) - ; GREEDY-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 - %val = call <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32> %rsrc, i32 %soffset, i32 0) - store <4 x i8 addrspace(1)*> %val, <4 x i8 addrspace(1)*> addrspace(1)* undef + %val = call <4 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v4p1(<4 x i32> %rsrc, i32 %soffset, i32 0) + store <4 x ptr addrspace(1)> %val, ptr addrspace(1) undef ret void } @@ -1091,16 +1091,16 @@ define amdgpu_ps void @s_buffer_load_v8p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) - ; CHECK-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, align 64, addrspace 1) ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1) ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; CHECK-NEXT: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1) ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; CHECK-NEXT: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v8p1_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -1121,19 +1121,19 @@ define amdgpu_ps void @s_buffer_load_v8p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) - ; GREEDY-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, align 64, addrspace 1) ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1) ; GREEDY-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 ; GREEDY-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GREEDY-NEXT: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1) ; GREEDY-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 ; GREEDY-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GREEDY-NEXT: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; GREEDY-NEXT: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 - %val = call <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32> %rsrc, i32 %soffset, i32 0) - store <8 x i8 addrspace(1)*> %val, <8 x i8 addrspace(1)*> addrspace(1)* undef + %val = call <8 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v8p1(<4 x i32> %rsrc, i32 %soffset, i32 0) + store <8 x ptr addrspace(1)> %val, ptr addrspace(1) undef ret void } @@ -3161,5 +3161,5 @@ declare <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32>, i32, i32 immarg) declare <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32>, i32, i32 immarg) declare <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32>, i32, i32 immarg) -declare <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32>, i32, i32 immarg) -declare <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32>, i32, i32 immarg) +declare <4 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v4p1(<4 x i32>, i32, i32 immarg) +declare <8 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v8p1(<4 x i32>, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll index b3a3d6394ce03..a922e0b46dfed 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s -define amdgpu_kernel void @sdivrem_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) { +define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i32 %x, i32 %y) { ; GFX8-LABEL: sdivrem_i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 @@ -136,13 +136,13 @@ define amdgpu_kernel void @sdivrem_i32(i32 addrspace(1)* %out0, i32 addrspace(1) ; GFX10-NEXT: global_store_dword v2, v1, s[2:3] ; GFX10-NEXT: s_endpgm %div = sdiv i32 %x, %y - store i32 %div, i32 addrspace(1)* %out0 + store i32 %div, ptr addrspace(1) %out0 %rem = srem i32 %x, %y - store i32 %rem, i32 addrspace(1)* %out1 + store i32 %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @sdivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1)* %out1, i64 %x, i64 %y) { +define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i64 %x, i64 %y) { ; GFX8-LABEL: sdivrem_i64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 @@ -613,13 +613,13 @@ define amdgpu_kernel void @sdivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1) ; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] ; GFX10-NEXT: s_endpgm %div = sdiv i64 %x, %y - store i64 %div, i64 addrspace(1)* %out0 + store i64 %div, ptr addrspace(1) %out0 %rem = srem i64 %x, %y - store i64 %rem, i64 addrspace(1)* %out1 + store i64 %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @sdivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32> addrspace(1)* %out1, <2 x i32> %x, <2 x i32> %y) { +define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i32> %x, <2 x i32> %y) { ; GFX8-LABEL: sdivrem_v2i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 @@ -842,13 +842,13 @@ define amdgpu_kernel void @sdivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32 ; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] ; GFX10-NEXT: s_endpgm %div = sdiv <2 x i32> %x, %y - store <2 x i32> %div, <2 x i32> addrspace(1)* %out0 + store <2 x i32> %div, ptr addrspace(1) %out0 %rem = srem <2 x i32> %x, %y - store <2 x i32> %rem, <2 x i32> addrspace(1)* %out1 + store <2 x i32> %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @sdivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32> addrspace(1)* %out1, <4 x i32> %x, <4 x i32> %y) { +define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <4 x i32> %x, <4 x i32> %y) { ; GFX8-LABEL: sdivrem_v4i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10 @@ -1268,13 +1268,13 @@ define amdgpu_kernel void @sdivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32 ; GFX10-NEXT: global_store_dwordx4 v8, v[4:7], s[6:7] ; GFX10-NEXT: s_endpgm %div = sdiv <4 x i32> %x, %y - store <4 x i32> %div, <4 x i32> addrspace(1)* %out0 + store <4 x i32> %div, ptr addrspace(1) %out0 %rem = srem <4 x i32> %x, %y - store <4 x i32> %rem, <4 x i32> addrspace(1)* %out1 + store <4 x i32> %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64> addrspace(1)* %out1, <2 x i64> %x, <2 x i64> %y) { +define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i64> %x, <2 x i64> %y) { ; GFX8-LABEL: sdivrem_v2i64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10 @@ -2191,13 +2191,13 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64 ; GFX10-NEXT: global_store_dwordx4 v16, v[4:7], s[6:7] ; GFX10-NEXT: s_endpgm %div = sdiv <2 x i64> %x, %y - store <2 x i64> %div, <2 x i64> addrspace(1)* %out0 + store <2 x i64> %div, ptr addrspace(1) %out0 %rem = srem <2 x i64> %x, %y - store <2 x i64> %rem, <2 x i64> addrspace(1)* %out1 + store <2 x i64> %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out1, i8 %x, i8 %y) { +define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i8 %x, i8 %y) { ; GFX8-LABEL: sdiv_i8: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dword s6, s[4:5], 0x10 @@ -2336,13 +2336,13 @@ define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out ; GFX10-NEXT: global_store_byte v2, v1, s[2:3] ; GFX10-NEXT: s_endpgm %div = sdiv i8 %x, %y - store i8 %div, i8 addrspace(1)* %out0 + store i8 %div, ptr addrspace(1) %out0 %rem = srem i8 %x, %y - store i8 %rem, i8 addrspace(1)* %out1 + store i8 %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @sdivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> addrspace(1)* %out1, <2 x i8> %x, <2 x i8> %y) { +define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i8> %x, <2 x i8> %y) { ; GFX8-LABEL: sdivrem_v2i8: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dword s2, s[4:5], 0x10 @@ -2600,13 +2600,13 @@ define amdgpu_kernel void @sdivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> a ; GFX10-NEXT: global_store_short v1, v2, s[6:7] ; GFX10-NEXT: s_endpgm %div = sdiv <2 x i8> %x, %y - store <2 x i8> %div, <2 x i8> addrspace(1)* %out0 + store <2 x i8> %div, ptr addrspace(1) %out0 %rem = srem <2 x i8> %x, %y - store <2 x i8> %rem, <2 x i8> addrspace(1)* %out1 + store <2 x i8> %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out0, i16 addrspace(1)* %out1, i16 %x, i16 %y) { +define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i16 %x, i16 %y) { ; GFX8-LABEL: sdiv_i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dword s6, s[4:5], 0x10 @@ -2745,13 +2745,13 @@ define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out0, i16 addrspace(1)* % ; GFX10-NEXT: global_store_short v2, v1, s[2:3] ; GFX10-NEXT: s_endpgm %div = sdiv i16 %x, %y - store i16 %div, i16 addrspace(1)* %out0 + store i16 %div, ptr addrspace(1) %out0 %rem = srem i16 %x, %y - store i16 %rem, i16 addrspace(1)* %out1 + store i16 %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @sdivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> %x, <2 x i16> %y) { +define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i16> %x, <2 x i16> %y) { ; GFX8-LABEL: sdivrem_v2i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10 @@ -3003,13 +3003,13 @@ define amdgpu_kernel void @sdivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16 ; GFX10-NEXT: global_store_dword v1, v2, s[6:7] ; GFX10-NEXT: s_endpgm %div = sdiv <2 x i16> %x, %y - store <2 x i16> %div, <2 x i16> addrspace(1)* %out0 + store <2 x i16> %div, ptr addrspace(1) %out0 %rem = srem <2 x i16> %x, %y - store <2 x i16> %rem, <2 x i16> addrspace(1)* %out1 + store <2 x i16> %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @sdivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* %out1, i3 %x, i3 %y) { +define amdgpu_kernel void @sdivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i3 %x, i3 %y) { ; GFX8-LABEL: sdivrem_i3: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dword s6, s[4:5], 0x10 @@ -3154,13 +3154,13 @@ define amdgpu_kernel void @sdivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ; GFX10-NEXT: global_store_byte v2, v1, s[2:3] ; GFX10-NEXT: s_endpgm %div = sdiv i3 %x, %y - store i3 %div, i3 addrspace(1)* %out0 + store i3 %div, ptr addrspace(1) %out0 %rem = srem i3 %x, %y - store i3 %rem, i3 addrspace(1)* %out1 + store i3 %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @sdivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) { +define amdgpu_kernel void @sdivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i27 %x, i27 %y) { ; GFX8-LABEL: sdivrem_i27: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 @@ -3305,8 +3305,8 @@ define amdgpu_kernel void @sdivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1) ; GFX10-NEXT: global_store_dword v2, v1, s[2:3] ; GFX10-NEXT: s_endpgm %div = sdiv i27 %x, %y - store i27 %div, i27 addrspace(1)* %out0 + store i27 %div, ptr addrspace(1) %out0 %rem = srem i27 %x, %y - store i27 %rem, i27 addrspace(1)* %out1 + store i27 %rem, ptr addrspace(1) %out1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll index 8f300f2baccc4..cd74035afee75 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll @@ -242,7 +242,7 @@ define i64 @v_shl_i64_sext_i32_overflow(i32 %x) { ret i64 %shl } -define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @mulu24_shl64(ptr addrspace(1) nocapture %arg) { ; GFX7-LABEL: mulu24_shl64: ; GFX7: ; %bb.0: ; %bb ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 @@ -319,12 +319,12 @@ bb: %tmp1 = and i32 %tmp, 6 %mulconv = mul nuw nsw i32 %tmp1, 7 %tmp2 = zext i32 %mulconv to i64 - %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp2 - store i32 0, i32 addrspace(1)* %tmp3, align 4 + %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp2 + store i32 0, ptr addrspace(1) %tmp3, align 4 ret void } -define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 addrspace(1)* nocapture readonly %arg1) { +define amdgpu_kernel void @muli24_shl64(ptr addrspace(1) nocapture %arg, ptr addrspace(1) nocapture readonly %arg1) { ; GFX7-LABEL: muli24_shl64: ; GFX7: ; %bb.0: ; %bb ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 @@ -417,14 +417,14 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = sext i32 %tmp to i64 - %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp2 - %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4 + %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp2 + %tmp4 = load i32, ptr addrspace(1) %tmp3, align 4 %tmp5 = or i32 %tmp4, -8388608 %tmp6 = mul nsw i32 %tmp5, -7 %tmp7 = zext i32 %tmp6 to i64 %tmp8 = shl nuw nsw i64 %tmp7, 3 - %tmp9 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i64 %tmp2 - store i64 %tmp8, i64 addrspace(1)* %tmp9, align 8 + %tmp9 = getelementptr inbounds i64, ptr addrspace(1) %arg, i64 %tmp2 + store i64 %tmp8, ptr addrspace(1) %tmp9, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll index 836ce947fbfa2..9bc6189503887 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll @@ -9,11 +9,11 @@ ; GCN-LABEL: {{^}}smrd0: ; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01 ; VIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 -define amdgpu_kernel void @smrd0(i32 addrspace(4)* %ptr) { +define amdgpu_kernel void @smrd0(ptr addrspace(4) %ptr) { entry: - %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 1 - %1 = load i32, i32 addrspace(4)* %0 - store i32 %1, i32 addrspace(1)* undef + %0 = getelementptr i32, ptr addrspace(4) %ptr, i64 1 + %1 = load i32, ptr addrspace(4) %0 + store i32 %1, ptr addrspace(1) undef ret void } @@ -21,11 +21,11 @@ entry: ; GCN-LABEL: {{^}}smrd1: ; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}} ; VIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc -define amdgpu_kernel void @smrd1(i32 addrspace(4)* %ptr) { +define amdgpu_kernel void @smrd1(ptr addrspace(4) %ptr) { entry: - %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 255 - %1 = load i32, i32 addrspace(4)* %0 - store i32 %1, i32 addrspace(1)* undef + %0 = getelementptr i32, ptr addrspace(4) %ptr, i64 255 + %1 = load i32, ptr addrspace(4) %0 + store i32 %1, ptr addrspace(1) undef ret void } @@ -36,11 +36,11 @@ entry: ; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100 ; VIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400 ; GCN: s_endpgm -define amdgpu_kernel void @smrd2(i32 addrspace(4)* %ptr) { +define amdgpu_kernel void @smrd2(ptr addrspace(4) %ptr) { entry: - %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 256 - %1 = load i32, i32 addrspace(4)* %0 - store i32 %1, i32 addrspace(1)* undef + %0 = getelementptr i32, ptr addrspace(4) %ptr, i64 256 + %1 = load i32, ptr addrspace(4) %0 + store i32 %1, ptr addrspace(1) undef ret void } @@ -51,11 +51,11 @@ entry: ; XSI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b ; TODO: Add VI checks ; XGCN: s_endpgm -define amdgpu_kernel void @smrd3(i32 addrspace(4)* %ptr) { +define amdgpu_kernel void @smrd3(ptr addrspace(4) %ptr) { entry: - %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 4294967296 ; 2 ^ 32 - %1 = load i32, i32 addrspace(4)* %0 - store i32 %1, i32 addrspace(1)* undef + %0 = getelementptr i32, ptr addrspace(4) %ptr, i64 4294967296 ; 2 ^ 32 + %1 = load i32, ptr addrspace(4) %0 + store i32 %1, ptr addrspace(1) undef ret void } @@ -67,11 +67,11 @@ entry: ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc ; GFX9_10: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc ; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] -define amdgpu_kernel void @smrd4(i32 addrspace(4)* %ptr) { +define amdgpu_kernel void @smrd4(ptr addrspace(4) %ptr) { entry: - %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 262143 - %1 = load i32, i32 addrspace(4)* %0 - store i32 %1, i32 addrspace(1)* undef + %0 = getelementptr i32, ptr addrspace(4) %ptr, i64 262143 + %1 = load i32, ptr addrspace(4) %0 + store i32 %1, ptr addrspace(1) undef ret void } @@ -81,11 +81,11 @@ entry: ; SIVIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] ; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000 ; GCN: s_endpgm -define amdgpu_kernel void @smrd5(i32 addrspace(4)* %ptr) { +define amdgpu_kernel void @smrd5(ptr addrspace(4) %ptr) { entry: - %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 262144 - %1 = load i32, i32 addrspace(4)* %0 - store i32 %1, i32 addrspace(1)* undef + %0 = getelementptr i32, ptr addrspace(4) %ptr, i64 262144 + %1 = load i32, ptr addrspace(4) %0 + store i32 %1, ptr addrspace(1) undef ret void } @@ -94,11 +94,11 @@ entry: ; SICIVI: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, -4 ; SICIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0 ; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], -0x4 -define amdgpu_kernel void @smrd6(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 { +define amdgpu_kernel void @smrd6(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 { entry: - %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 -1 - %tmp1 = load i32, i32 addrspace(4)* %tmp - store i32 %tmp1, i32 addrspace(1)* %out + %tmp = getelementptr i32, ptr addrspace(4) %ptr, i64 -1 + %tmp1 = load i32, ptr addrspace(4) %tmp + store i32 %tmp1, ptr addrspace(1) %out ret void } @@ -107,10 +107,10 @@ entry: ; GCN: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, 0xffe00000 ; SICIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0 ; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0 -define amdgpu_kernel void @smrd7(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 { +define amdgpu_kernel void @smrd7(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 { entry: - %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 -524288 - %tmp1 = load i32, i32 addrspace(4)* %tmp - store i32 %tmp1, i32 addrspace(1)* %out + %tmp = getelementptr i32, ptr addrspace(4) %ptr, i64 -524288 + %tmp1 = load i32, ptr addrspace(4) %tmp + store i32 %tmp1, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll index 63b4bf25e5bf1..b9c7cc3aa3f01 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll @@ -7,7 +7,7 @@ ; FIXME: ; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s -define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { +define amdgpu_kernel void @store_lds_v4i32(ptr addrspace(3) %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 @@ -60,11 +60,11 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3 ; GFX11-NEXT: v_mov_b32_e32 v4, s0 ; GFX11-NEXT: ds_store_b128 v4, v[0:3] ; GFX11-NEXT: s_endpgm - store <4 x i32> %x, <4 x i32> addrspace(3)* %out + store <4 x i32> %x, ptr addrspace(3) %out ret void } -define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { +define amdgpu_kernel void @store_lds_v4i32_align1(ptr addrspace(3) %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 @@ -286,11 +286,11 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, ; GFX11-NEXT: ds_store_b8 v1, v7 offset:14 ; GFX11-NEXT: ds_store_b8 v1, v8 offset:15 ; GFX11-NEXT: s_endpgm - store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 1 + store <4 x i32> %x, ptr addrspace(3) %out, align 1 ret void } -define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { +define amdgpu_kernel void @store_lds_v4i32_align2(ptr addrspace(3) %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 @@ -401,11 +401,11 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, ; GFX11-NEXT: ds_store_b16 v1, v4 offset:12 ; GFX11-NEXT: ds_store_b16 v1, v8 offset:14 ; GFX11-NEXT: s_endpgm - store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 2 + store <4 x i32> %x, ptr addrspace(3) %out, align 2 ret void } -define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { +define amdgpu_kernel void @store_lds_v4i32_align4(ptr addrspace(3) %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align4: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 @@ -462,11 +462,11 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, ; GFX11-NEXT: ds_store_2addr_b32 v1, v0, v2 offset1:1 ; GFX11-NEXT: ds_store_2addr_b32 v1, v3, v4 offset0:2 offset1:3 ; GFX11-NEXT: s_endpgm - store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 4 + store <4 x i32> %x, ptr addrspace(3) %out, align 4 ret void } -define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { +define amdgpu_kernel void @store_lds_v4i32_align8(ptr addrspace(3) %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 @@ -520,11 +520,11 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, ; GFX11-NEXT: v_mov_b32_e32 v4, s0 ; GFX11-NEXT: ds_store_2addr_b64 v4, v[0:1], v[2:3] offset1:1 ; GFX11-NEXT: s_endpgm - store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 8 + store <4 x i32> %x, ptr addrspace(3) %out, align 8 ret void } -define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { +define amdgpu_kernel void @store_lds_v4i32_align16(ptr addrspace(3) %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 @@ -577,6 +577,6 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, ; GFX11-NEXT: v_mov_b32_e32 v4, s0 ; GFX11-NEXT: ds_store_b128 v4, v[0:3] ; GFX11-NEXT: s_endpgm - store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 16 + store <4 x i32> %x, ptr addrspace(3) %out, align 16 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll index f2fb1371a8fb1..cfca355e61517 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll @@ -7,7 +7,7 @@ ; FIXME: ; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s -define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { +define amdgpu_kernel void @store_lds_v3i32(ptr addrspace(3) %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10 @@ -56,11 +56,11 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3 ; GFX11-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s0 ; GFX11-NEXT: ds_store_b96 v3, v[0:2] ; GFX11-NEXT: s_endpgm - store <3 x i32> %x, <3 x i32> addrspace(3)* %out + store <3 x i32> %x, ptr addrspace(3) %out ret void } -define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { +define amdgpu_kernel void @store_lds_v3i32_align1(ptr addrspace(3) %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 @@ -233,11 +233,11 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, ; GFX11-NEXT: ds_store_b8 v1, v6 offset:10 ; GFX11-NEXT: ds_store_b8 v1, v12 offset:11 ; GFX11-NEXT: s_endpgm - store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 1 + store <3 x i32> %x, ptr addrspace(3) %out, align 1 ret void } -define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { +define amdgpu_kernel void @store_lds_v3i32_align2(ptr addrspace(3) %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 @@ -329,11 +329,11 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, ; GFX11-NEXT: ds_store_b16 v1, v4 offset:8 ; GFX11-NEXT: ds_store_b16 v1, v6 offset:10 ; GFX11-NEXT: s_endpgm - store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 2 + store <3 x i32> %x, ptr addrspace(3) %out, align 2 ret void } -define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { +define amdgpu_kernel void @store_lds_v3i32_align4(ptr addrspace(3) %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align4: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 @@ -386,11 +386,11 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, ; GFX11-NEXT: ds_store_2addr_b32 v1, v0, v2 offset1:1 ; GFX11-NEXT: ds_store_b32 v1, v3 offset:8 ; GFX11-NEXT: s_endpgm - store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 4 + store <3 x i32> %x, ptr addrspace(3) %out, align 4 ret void } -define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { +define amdgpu_kernel void @store_lds_v3i32_align8(ptr addrspace(3) %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 @@ -443,11 +443,11 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, ; GFX11-NEXT: ds_store_2addr_b32 v1, v0, v2 offset1:1 ; GFX11-NEXT: ds_store_b32 v1, v3 offset:8 ; GFX11-NEXT: s_endpgm - store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 8 + store <3 x i32> %x, ptr addrspace(3) %out, align 8 ret void } -define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { +define amdgpu_kernel void @store_lds_v3i32_align16(ptr addrspace(3) %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10 @@ -496,6 +496,6 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, ; GFX11-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s0 ; GFX11-NEXT: ds_store_b96 v3, v[0:2] ; GFX11-NEXT: s_endpgm - store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 16 + store <3 x i32> %x, ptr addrspace(3) %out, align 16 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll index 6af44319a502f..9d8f051b83036 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s -define amdgpu_kernel void @udivrem_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) { +define amdgpu_kernel void @udivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i32 %x, i32 %y) { ; GFX8-LABEL: udivrem_i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 @@ -103,13 +103,13 @@ define amdgpu_kernel void @udivrem_i32(i32 addrspace(1)* %out0, i32 addrspace(1) ; GFX10-NEXT: global_store_dword v2, v1, s[2:3] ; GFX10-NEXT: s_endpgm %div = udiv i32 %x, %y - store i32 %div, i32 addrspace(1)* %out0 + store i32 %div, ptr addrspace(1) %out0 %rem = urem i32 %x, %y - store i32 %rem, i32 addrspace(1)* %out1 + store i32 %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @udivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1)* %out1, i64 %x, i64 %y) { +define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i64 %x, i64 %y) { ; GFX8-LABEL: udivrem_i64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 @@ -519,13 +519,13 @@ define amdgpu_kernel void @udivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1) ; GFX10-NEXT: global_store_dwordx2 v7, v[2:3], s[6:7] ; GFX10-NEXT: s_endpgm %div = udiv i64 %x, %y - store i64 %div, i64 addrspace(1)* %out0 + store i64 %div, ptr addrspace(1) %out0 %rem = urem i64 %x, %y - store i64 %rem, i64 addrspace(1)* %out1 + store i64 %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @udivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32> addrspace(1)* %out1, <2 x i32> %x, <2 x i32> %y) { +define amdgpu_kernel void @udivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i32> %x, <2 x i32> %y) { ; GFX8-LABEL: udivrem_v2i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 @@ -682,13 +682,13 @@ define amdgpu_kernel void @udivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32 ; GFX10-NEXT: global_store_dwordx2 v8, v[2:3], s[6:7] ; GFX10-NEXT: s_endpgm %div = udiv <2 x i32> %x, %y - store <2 x i32> %div, <2 x i32> addrspace(1)* %out0 + store <2 x i32> %div, ptr addrspace(1) %out0 %rem = urem <2 x i32> %x, %y - store <2 x i32> %rem, <2 x i32> addrspace(1)* %out1 + store <2 x i32> %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @udivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32> addrspace(1)* %out1, <4 x i32> %x, <4 x i32> %y) { +define amdgpu_kernel void @udivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <4 x i32> %x, <4 x i32> %y) { ; GFX8-LABEL: udivrem_v4i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10 @@ -976,13 +976,13 @@ define amdgpu_kernel void @udivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32 ; GFX10-NEXT: global_store_dwordx4 v8, v[4:7], s[6:7] ; GFX10-NEXT: s_endpgm %div = udiv <4 x i32> %x, %y - store <4 x i32> %div, <4 x i32> addrspace(1)* %out0 + store <4 x i32> %div, ptr addrspace(1) %out0 %rem = urem <4 x i32> %x, %y - store <4 x i32> %rem, <4 x i32> addrspace(1)* %out1 + store <4 x i32> %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64> addrspace(1)* %out1, <2 x i64> %x, <2 x i64> %y) { +define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i64> %x, <2 x i64> %y) { ; GFX8-LABEL: udivrem_v2i64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10 @@ -1776,13 +1776,13 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64 ; GFX10-NEXT: global_store_dwordx4 v9, v[4:7], s[6:7] ; GFX10-NEXT: s_endpgm %div = udiv <2 x i64> %x, %y - store <2 x i64> %div, <2 x i64> addrspace(1)* %out0 + store <2 x i64> %div, ptr addrspace(1) %out0 %rem = urem <2 x i64> %x, %y - store <2 x i64> %rem, <2 x i64> addrspace(1)* %out1 + store <2 x i64> %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @udiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out1, i8 %x, i8 %y) { +define amdgpu_kernel void @udiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i8 %x, i8 %y) { ; GFX8-LABEL: udiv_i8: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dword s6, s[4:5], 0x10 @@ -1888,13 +1888,13 @@ define amdgpu_kernel void @udiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out ; GFX10-NEXT: global_store_byte v2, v1, s[2:3] ; GFX10-NEXT: s_endpgm %div = udiv i8 %x, %y - store i8 %div, i8 addrspace(1)* %out0 + store i8 %div, ptr addrspace(1) %out0 %rem = urem i8 %x, %y - store i8 %rem, i8 addrspace(1)* %out1 + store i8 %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @udivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> addrspace(1)* %out1, <2 x i8> %x, <2 x i8> %y) { +define amdgpu_kernel void @udivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i8> %x, <2 x i8> %y) { ; GFX8-LABEL: udivrem_v2i8: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x10 @@ -2085,13 +2085,13 @@ define amdgpu_kernel void @udivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> a ; GFX10-NEXT: global_store_short v1, v2, s[6:7] ; GFX10-NEXT: s_endpgm %div = udiv <2 x i8> %x, %y - store <2 x i8> %div, <2 x i8> addrspace(1)* %out0 + store <2 x i8> %div, ptr addrspace(1) %out0 %rem = urem <2 x i8> %x, %y - store <2 x i8> %rem, <2 x i8> addrspace(1)* %out1 + store <2 x i8> %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @udiv_i16(i16 addrspace(1)* %out0, i16 addrspace(1)* %out1, i16 %x, i16 %y) { +define amdgpu_kernel void @udiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i16 %x, i16 %y) { ; GFX8-LABEL: udiv_i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dword s6, s[4:5], 0x10 @@ -2197,13 +2197,13 @@ define amdgpu_kernel void @udiv_i16(i16 addrspace(1)* %out0, i16 addrspace(1)* % ; GFX10-NEXT: global_store_short v2, v1, s[2:3] ; GFX10-NEXT: s_endpgm %div = udiv i16 %x, %y - store i16 %div, i16 addrspace(1)* %out0 + store i16 %div, ptr addrspace(1) %out0 %rem = urem i16 %x, %y - store i16 %rem, i16 addrspace(1)* %out1 + store i16 %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @udivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> %x, <2 x i16> %y) { +define amdgpu_kernel void @udivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i16> %x, <2 x i16> %y) { ; GFX8-LABEL: udivrem_v2i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 @@ -2388,13 +2388,13 @@ define amdgpu_kernel void @udivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16 ; GFX10-NEXT: global_store_dword v1, v2, s[6:7] ; GFX10-NEXT: s_endpgm %div = udiv <2 x i16> %x, %y - store <2 x i16> %div, <2 x i16> addrspace(1)* %out0 + store <2 x i16> %div, ptr addrspace(1) %out0 %rem = urem <2 x i16> %x, %y - store <2 x i16> %rem, <2 x i16> addrspace(1)* %out1 + store <2 x i16> %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @udivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* %out1, i3 %x, i3 %y) { +define amdgpu_kernel void @udivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i3 %x, i3 %y) { ; GFX8-LABEL: udivrem_i3: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dword s6, s[4:5], 0x10 @@ -2506,13 +2506,13 @@ define amdgpu_kernel void @udivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ; GFX10-NEXT: global_store_byte v2, v1, s[2:3] ; GFX10-NEXT: s_endpgm %div = udiv i3 %x, %y - store i3 %div, i3 addrspace(1)* %out0 + store i3 %div, ptr addrspace(1) %out0 %rem = urem i3 %x, %y - store i3 %rem, i3 addrspace(1)* %out1 + store i3 %rem, ptr addrspace(1) %out1 ret void } -define amdgpu_kernel void @udivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) { +define amdgpu_kernel void @udivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i27 %x, i27 %y) { ; GFX8-LABEL: udivrem_i27: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 @@ -2624,8 +2624,8 @@ define amdgpu_kernel void @udivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1) ; GFX10-NEXT: global_store_dword v2, v1, s[2:3] ; GFX10-NEXT: s_endpgm %div = udiv i27 %x, %y - store i27 %div, i27 addrspace(1)* %out0 + store i27 %div, ptr addrspace(1) %out0 %rem = urem i27 %x, %y - store i27 %rem, i27 addrspace(1)* %out1 + store i27 %rem, ptr addrspace(1) %out1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll index f81d7128952dc..82fa56f15db35 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s -define amdgpu_kernel void @constant_load_i8_align4(i8 addrspace (1)* %out, i8 addrspace(4)* %in) #0 { +define amdgpu_kernel void @constant_load_i8_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) #0 { ; GFX8-LABEL: constant_load_i8_align4: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -37,12 +37,12 @@ define amdgpu_kernel void @constant_load_i8_align4(i8 addrspace (1)* %out, i8 ad ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: global_store_byte v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm - %ld = load i8, i8 addrspace(4)* %in, align 4 - store i8 %ld, i8 addrspace(1)* %out, align 4 + %ld = load i8, ptr addrspace(4) %in, align 4 + store i8 %ld, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @constant_load_i16_align4(i16 addrspace (1)* %out, i16 addrspace(4)* %in) #0 { +define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) #0 { ; GFX8-LABEL: constant_load_i16_align4: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -76,12 +76,12 @@ define amdgpu_kernel void @constant_load_i16_align4(i16 addrspace (1)* %out, i16 ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: global_store_short v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm - %ld = load i16, i16 addrspace(4)* %in, align 4 - store i16 %ld, i16 addrspace(1)* %out, align 4 + %ld = load i16, ptr addrspace(4) %in, align 4 + store i16 %ld, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @sextload_i8_to_i32_align4(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { +define amdgpu_kernel void @sextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX8-LABEL: sextload_i8_to_i32_align4: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -118,13 +118,13 @@ define amdgpu_kernel void @sextload_i8_to_i32_align4(i32 addrspace(1)* %out, i8 ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm - %load = load i8, i8 addrspace(1)* %in, align 4 + %load = load i8, ptr addrspace(1) %in, align 4 %sext = sext i8 %load to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @sextload_i16_to_i32_align4(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { +define amdgpu_kernel void @sextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX8-LABEL: sextload_i16_to_i32_align4: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -161,13 +161,13 @@ define amdgpu_kernel void @sextload_i16_to_i32_align4(i32 addrspace(1)* %out, i1 ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm - %load = load i16, i16 addrspace(1)* %in, align 4 + %load = load i16, ptr addrspace(1) %in, align 4 %sext = sext i16 %load to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @zextload_i8_to_i32_align4(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { +define amdgpu_kernel void @zextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX8-LABEL: zextload_i8_to_i32_align4: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -204,13 +204,13 @@ define amdgpu_kernel void @zextload_i8_to_i32_align4(i32 addrspace(1)* %out, i8 ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm - %load = load i8, i8 addrspace(1)* %in, align 4 + %load = load i8, ptr addrspace(1) %in, align 4 %zext = zext i8 %load to i32 - store i32 %zext, i32 addrspace(1)* %out, align 4 + store i32 %zext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @zextload_i16_to_i32_align4(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { +define amdgpu_kernel void @zextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX8-LABEL: zextload_i16_to_i32_align4: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -247,13 +247,13 @@ define amdgpu_kernel void @zextload_i16_to_i32_align4(i32 addrspace(1)* %out, i1 ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm - %load = load i16, i16 addrspace(1)* %in, align 4 + %load = load i16, ptr addrspace(1) %in, align 4 %zext = zext i16 %load to i32 - store i32 %zext, i32 addrspace(1)* %out, align 4 + store i32 %zext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @constant_load_i8_align2(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { +define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX8-LABEL: constant_load_i8_align2: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -286,12 +286,12 @@ define amdgpu_kernel void @constant_load_i8_align2(i8 addrspace(1)* %out, i8 add ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm - %load = load i8, i8 addrspace(1)* %in, align 2 - store i8 %load, i8 addrspace(1)* %out, align 2 + %load = load i8, ptr addrspace(1) %in, align 2 + store i8 %load, ptr addrspace(1) %out, align 2 ret void } -define amdgpu_kernel void @constant_load_i16_align2(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { +define amdgpu_kernel void @constant_load_i16_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX8-LABEL: constant_load_i16_align2: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -324,12 +324,12 @@ define amdgpu_kernel void @constant_load_i16_align2(i16 addrspace(1)* %out, i16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_short v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm - %load = load i16, i16 addrspace(1)* %in, align 2 - store i16 %load, i16 addrspace(1)* %out, align 2 + %load = load i16, ptr addrspace(1) %in, align 2 + store i16 %load, ptr addrspace(1) %out, align 2 ret void } -define amdgpu_kernel void @constant_sextload_i8_align2(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { +define amdgpu_kernel void @constant_sextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX8-LABEL: constant_sextload_i8_align2: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -370,13 +370,13 @@ define amdgpu_kernel void @constant_sextload_i8_align2(i32 addrspace(1)* %out, i ; GFX10-NEXT: global_store_short v0, v1, s[0:1] ; GFX10-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:2 ; GFX10-NEXT: s_endpgm - %load = load i8, i8 addrspace(1)* %in, align 2 + %load = load i8, ptr addrspace(1) %in, align 2 %sextload = sext i8 %load to i32 - store i32 %sextload, i32 addrspace(1)* %out, align 2 + store i32 %sextload, ptr addrspace(1) %out, align 2 ret void } -define amdgpu_kernel void @constant_zextload_i8_align2(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { +define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; GFX8-LABEL: constant_zextload_i8_align2: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -417,9 +417,9 @@ define amdgpu_kernel void @constant_zextload_i8_align2(i32 addrspace(1)* %out, i ; GFX10-NEXT: global_store_short v0, v1, s[0:1] ; GFX10-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:2 ; GFX10-NEXT: s_endpgm - %load = load i8, i8 addrspace(1)* %in, align 2 + %load = load i8, ptr addrspace(1) %in, align 2 %zextload = zext i8 %load to i32 - store i32 %zextload, i32 addrspace(1)* %out, align 2 + store i32 %zextload, ptr addrspace(1) %out, align 2 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll index fe27f45b1585e..e9b94db905267 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll @@ -5,7 +5,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11 %s -define i32 @zextload_global_i1_to_i32(i1 addrspace(1)* %ptr) { +define i32 @zextload_global_i1_to_i32(ptr addrspace(1) %ptr) { ; GFX9-LABEL: zextload_global_i1_to_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -50,12 +50,12 @@ define i32 @zextload_global_i1_to_i32(i1 addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load i1, i1 addrspace(1)* %ptr + %load = load i1, ptr addrspace(1) %ptr %ext = zext i1 %load to i32 ret i32 %ext } -define i32 @zextload_global_i8_to_i32(i8 addrspace(1)* %ptr) { +define i32 @zextload_global_i8_to_i32(ptr addrspace(1) %ptr) { ; GFX9-LABEL: zextload_global_i8_to_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -95,12 +95,12 @@ define i32 @zextload_global_i8_to_i32(i8 addrspace(1)* %ptr) { ; GFX11-NEXT: global_load_u8 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load i8, i8 addrspace(1)* %ptr + %load = load i8, ptr addrspace(1) %ptr %ext = zext i8 %load to i32 ret i32 %ext } -define i32 @zextload_global_i16_to_i32(i16 addrspace(1)* %ptr) { +define i32 @zextload_global_i16_to_i32(ptr addrspace(1) %ptr) { ; GFX9-LABEL: zextload_global_i16_to_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -140,12 +140,12 @@ define i32 @zextload_global_i16_to_i32(i16 addrspace(1)* %ptr) { ; GFX11-NEXT: global_load_u16 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load i16, i16 addrspace(1)* %ptr + %load = load i16, ptr addrspace(1) %ptr %ext = zext i16 %load to i32 ret i32 %ext } -define i64 @zextload_global_i1_to_i64(i1 addrspace(1)* %ptr) { +define i64 @zextload_global_i1_to_i64(ptr addrspace(1) %ptr) { ; GFX9-LABEL: zextload_global_i1_to_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -194,12 +194,12 @@ define i64 @zextload_global_i1_to_i64(i1 addrspace(1)* %ptr) { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load i1, i1 addrspace(1)* %ptr + %load = load i1, ptr addrspace(1) %ptr %ext = zext i1 %load to i64 ret i64 %ext } -define i64 @zextload_global_i8_to_i64(i8 addrspace(1)* %ptr) { +define i64 @zextload_global_i8_to_i64(ptr addrspace(1) %ptr) { ; GFX9-LABEL: zextload_global_i8_to_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -244,12 +244,12 @@ define i64 @zextload_global_i8_to_i64(i8 addrspace(1)* %ptr) { ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load i8, i8 addrspace(1)* %ptr + %load = load i8, ptr addrspace(1) %ptr %ext = zext i8 %load to i64 ret i64 %ext } -define i64 @zextload_global_i16_to_i64(i16 addrspace(1)* %ptr) { +define i64 @zextload_global_i16_to_i64(ptr addrspace(1) %ptr) { ; GFX9-LABEL: zextload_global_i16_to_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -294,12 +294,12 @@ define i64 @zextload_global_i16_to_i64(i16 addrspace(1)* %ptr) { ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load i16, i16 addrspace(1)* %ptr + %load = load i16, ptr addrspace(1) %ptr %ext = zext i16 %load to i64 ret i64 %ext } -define i64 @zextload_global_i32_to_i64(i32 addrspace(1)* %ptr) { +define i64 @zextload_global_i32_to_i64(ptr addrspace(1) %ptr) { ; GFX9-LABEL: zextload_global_i32_to_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -344,12 +344,12 @@ define i64 @zextload_global_i32_to_i64(i32 addrspace(1)* %ptr) { ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load i32, i32 addrspace(1)* %ptr + %load = load i32, ptr addrspace(1) %ptr %ext = zext i32 %load to i64 ret i64 %ext } -define i96 @zextload_global_i32_to_i96(i32 addrspace(1)* %ptr) { +define i96 @zextload_global_i32_to_i96(ptr addrspace(1) %ptr) { ; GFX9-LABEL: zextload_global_i32_to_i96: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -398,12 +398,12 @@ define i96 @zextload_global_i32_to_i96(i32 addrspace(1)* %ptr) { ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load i32, i32 addrspace(1)* %ptr + %load = load i32, ptr addrspace(1) %ptr %ext = zext i32 %load to i96 ret i96 %ext } -define i128 @zextload_global_i32_to_i128(i32 addrspace(1)* %ptr) { +define i128 @zextload_global_i32_to_i128(ptr addrspace(1) %ptr) { ; GFX9-LABEL: zextload_global_i32_to_i128: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -457,7 +457,7 @@ define i128 @zextload_global_i32_to_i128(i32 addrspace(1)* %ptr) { ; GFX11-NEXT: v_mov_b32_e32 v3, 0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - %load = load i32, i32 addrspace(1)* %ptr + %load = load i32, ptr addrspace(1) %ptr %ext = zext i32 %load to i128 ret i128 %ext }